mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
282 lines
7.2 KiB
V
282 lines
7.2 KiB
V
// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
|
|
// Use of this source code is governed by an MIT license
|
|
// that can be found in the LICENSE file.
|
|
module scanner
|
|
|
|
import v.token
|
|
import v.pref
|
|
|
|
fn scan_kinds(text string) []token.Kind {
|
|
mut scanner := new_scanner(text, .skip_comments, &pref.Preferences{})
|
|
mut token_kinds := []token.Kind{}
|
|
for {
|
|
tok := scanner.scan()
|
|
if tok.kind == .eof {
|
|
break
|
|
}
|
|
token_kinds << tok.kind
|
|
}
|
|
return token_kinds
|
|
}
|
|
|
|
fn scan_tokens(text string) []token.Token {
|
|
mut scanner := new_scanner(text, .parse_comments, &pref.Preferences{})
|
|
mut tokens := []token.Token{}
|
|
for {
|
|
tok := scanner.scan()
|
|
if tok.kind == .eof {
|
|
break
|
|
}
|
|
tokens << tok
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
fn test_scan() {
|
|
token_kinds := scan_kinds('println(2 + 3)')
|
|
assert token_kinds.len == 6
|
|
assert token_kinds[0] == .name
|
|
assert token_kinds[1] == .lpar
|
|
assert token_kinds[2] == .number
|
|
assert token_kinds[3] == .plus
|
|
assert token_kinds[4] == .number
|
|
assert token_kinds[5] == .rpar
|
|
}
|
|
|
|
fn test_number_constant_input_format() {
|
|
mut c := 0xa0
|
|
assert c == 0xa0
|
|
c = 0b1001
|
|
assert c == 9
|
|
c = 1000000
|
|
assert c == 1000000
|
|
}
|
|
|
|
fn test_float_conversion_and_reading() {
|
|
d := 23000000e-3
|
|
assert int(d) == 23000
|
|
mut e := 1.2E3 * -1e-1
|
|
assert e == -120.0
|
|
e = 1.2E3 * 1e-1
|
|
x := 55.0
|
|
assert e == 120.0
|
|
assert 1.23e+10 == 1.23e10
|
|
assert 1.23e+10 == 1.23e0010
|
|
assert (-1.23e+10) == (1.23e0010 * -1.0)
|
|
assert x == 55.0
|
|
}
|
|
|
|
fn test_float_without_fraction() {
|
|
mut result := scan_kinds('x := 10.0')
|
|
assert result.len == 3
|
|
assert result[0] == .name
|
|
assert result[1] == .decl_assign
|
|
assert result[2] == .number
|
|
result = scan_kinds('return 3.0, 4.0')
|
|
assert result.len == 4
|
|
assert result[0] == .key_return
|
|
assert result[1] == .number
|
|
assert result[2] == .comma
|
|
assert result[3] == .number
|
|
result = scan_kinds('fun(5.0)')
|
|
assert result.len == 4
|
|
assert result[0] == .name
|
|
assert result[1] == .lpar
|
|
assert result[2] == .number
|
|
assert result[3] == .rpar
|
|
}
|
|
|
|
fn test_reference_bools() {
|
|
result := scan_kinds('true && false')
|
|
assert result.len == 3
|
|
assert result[0] == .key_true
|
|
assert result[1] == .and
|
|
assert result[2] == .key_false
|
|
}
|
|
|
|
fn test_reference_var() {
|
|
result := scan_kinds('&foo')
|
|
assert result.len == 2
|
|
assert result[0] == .amp
|
|
assert result[1] == .name
|
|
}
|
|
|
|
fn test_array_of_references() {
|
|
result := scan_kinds('[]&foo')
|
|
assert result.len == 4
|
|
assert result[0] == .lsbr
|
|
assert result[1] == .rsbr
|
|
assert result[2] == .amp
|
|
assert result[3] == .name
|
|
}
|
|
|
|
fn test_ref_array_of_references() {
|
|
result := scan_kinds('&[]&foo')
|
|
assert result.len == 5
|
|
assert result[0] == .amp
|
|
assert result[1] == .lsbr
|
|
assert result[2] == .rsbr
|
|
assert result[3] == .amp
|
|
assert result[4] == .name
|
|
}
|
|
|
|
fn test_ref_ref_foo() {
|
|
result := scan_kinds('&&foo')
|
|
assert result.len == 3
|
|
assert result[0] == .amp
|
|
assert result[1] == .amp
|
|
assert result[2] == .name
|
|
}
|
|
|
|
fn test_array_of_ref_ref_foo() {
|
|
result := scan_kinds('[]&&foo')
|
|
assert result.len == 5
|
|
assert result[0] == .lsbr
|
|
assert result[1] == .rsbr
|
|
assert result[2] == .amp
|
|
assert result[3] == .amp
|
|
assert result[4] == .name
|
|
}
|
|
|
|
fn test_ref_ref_array_ref_ref_foo() {
|
|
result := scan_kinds('&&[]&&foo')
|
|
assert result.len == 7
|
|
assert result[0] == .amp
|
|
assert result[1] == .amp
|
|
assert result[2] == .lsbr
|
|
assert result[3] == .rsbr
|
|
assert result[4] == .amp
|
|
assert result[5] == .amp
|
|
assert result[6] == .name
|
|
}
|
|
|
|
fn test_escape_rune() {
|
|
// these lines work if the v compiler is working
|
|
// will not work until v compiler on github is updated
|
|
// assert `\x61` == `a`
|
|
// assert `\u0061` == `a`
|
|
|
|
// will not work until PR is accepted
|
|
// assert `\141` == `a`
|
|
// assert `\xe2\x98\x85` == `★`
|
|
// assert `\342\230\205` == `★`
|
|
|
|
// the following lines test the scanner module
|
|
// even before it is compiled into the v executable
|
|
|
|
// SINGLE CHAR ESCAPES
|
|
// SINGLE CHAR APOSTROPHE
|
|
mut result := scan_tokens(r"`'`")
|
|
assert result[0].kind == .chartoken
|
|
assert result[0].lit == r"\'"
|
|
|
|
// SINGLE CHAR BACKTICK
|
|
result = scan_tokens(r'`\``')
|
|
assert result[0].kind == .chartoken
|
|
assert result[0].lit == r'\`'
|
|
|
|
// SINGLE CHAR SLASH
|
|
result = scan_tokens(r'`\\`')
|
|
assert result[0].kind == .chartoken
|
|
assert result[0].lit == r'\\'
|
|
|
|
// SINGLE CHAR UNICODE ESCAPE
|
|
result = scan_tokens(r'`\u2605`')
|
|
assert result[0].kind == .chartoken
|
|
assert result[0].lit == r'★'
|
|
|
|
// SINGLE CHAR ESCAPED ASCII
|
|
result = scan_tokens(r'`\x61`')
|
|
assert result[0].kind == .chartoken
|
|
assert result[0].lit == r'a'
|
|
|
|
// SINGLE CHAR INCORRECT ESCAPE
|
|
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
|
|
|
// SINGLE CHAR MULTI-BYTE UTF-8 (hex)
|
|
result = scan_tokens(r'`\xe2\x98\x85`')
|
|
assert result[0].lit == r'★'
|
|
|
|
// SINGLE CHAR MULTI-BYTE UTF-8 (octal)
|
|
result = scan_tokens(r'`\342\230\205`')
|
|
assert result[0].lit == r'★'
|
|
}
|
|
|
|
fn test_escape_string() {
|
|
// these lines work if the v compiler is working
|
|
assert '\x61' == 'a'
|
|
assert '\x62' == 'b'
|
|
assert '\u0061' == 'a'
|
|
assert '\141' == 'a'
|
|
assert '\xe2\x98\x85' == '★'
|
|
assert '\342\230\205' == '★'
|
|
|
|
// the following lines test the scanner module
|
|
// even before it is compiled into the v executable
|
|
|
|
// STRING ESCAPES =================
|
|
// STRING APOSTROPHE
|
|
mut result := scan_tokens(r"'\''")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r"\'"
|
|
|
|
// STRING BACKTICK
|
|
result = scan_tokens(r"'\`'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'\`'
|
|
|
|
// STRING SLASH
|
|
result = scan_tokens(r"'\\'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'\\'
|
|
|
|
// STRING UNICODE ESCAPE
|
|
result = scan_tokens(r"'\u2605'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'★'
|
|
result = scan_tokens(r"'H\u2605H'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'H★H'
|
|
|
|
// STRING ESCAPED ASCII
|
|
result = scan_tokens(r"'\x61'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'a'
|
|
|
|
// STRING ESCAPED EXTENDED ASCII
|
|
// (should not be converted to unicode)
|
|
result = scan_tokens(r"'\xe29885'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit.bytes() == [u8(0xe2), `9`, `8`, `8`, `5`]
|
|
|
|
// MIX STRING ESCAPES
|
|
result = scan_tokens(r"'\x61\u2605'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'a★'
|
|
result = scan_tokens(r"'\u2605\x61'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'★a'
|
|
|
|
// MIX STRING ESCAPES with offset
|
|
result = scan_tokens(r"'x \x61\u2605\x61'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'x a★a'
|
|
result = scan_tokens(r"'x \u2605\x61\u2605'")
|
|
assert result[0].kind == .string
|
|
assert result[0].lit == r'x ★a★'
|
|
|
|
// SHOULD RESULT IN ERRORS
|
|
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
|
// result = scan_tokens(r"'\x'") // should always result in an error
|
|
// result = scan_tokens(r'`hello`') // should always result in an error
|
|
}
|
|
|
|
fn test_comment_string() {
|
|
mut result := scan_tokens('// single line comment will get an \\x01 prepended')
|
|
assert result[0].kind == .comment
|
|
assert result[0].lit[0] == u8(1) // \x01
|
|
// result = scan_tokens('/// doc comment will keep third / at beginning')
|
|
// result = scan_tokens('/* block comment will be stripped of whitespace */')
|
|
// result = scan_tokens('a := 0 // line end comment also gets \\x01 prepended')
|
|
}
|