mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
scanner: multibyte rune literals now support unicode, hex, and octal escape codes (#13140)
This commit is contained in:
parent
bb6c46e1ef
commit
7a2705d8ce
@ -16,6 +16,11 @@ indent_size = 2
|
|||||||
|
|
||||||
[*.md]
|
[*.md]
|
||||||
trim_trailing_whitespace = false
|
trim_trailing_whitespace = false
|
||||||
|
# lines that are too long will trigger an error in cmd/tools/vcheck-md.v
|
||||||
|
# run v check-md [folder/file] to test markdown files
|
||||||
|
# the longest normal line is specified with this constant:
|
||||||
|
# `too_long_line_length_other = 100`
|
||||||
|
max_line_length = 100
|
||||||
|
|
||||||
[*.{txt,out}]
|
[*.{txt,out}]
|
||||||
insert_final_newline = false
|
insert_final_newline = false
|
||||||
|
192
doc/docs.md
192
doc/docs.md
@ -476,16 +476,33 @@ d := b + x // d is of type `f64` - automatic promotion of `x`'s value
|
|||||||
|
|
||||||
### Strings
|
### Strings
|
||||||
|
|
||||||
```v
|
```v nofmt
|
||||||
name := 'Bob'
|
name := 'Bob'
|
||||||
println(name.len)
|
assert name.len == 3 // will print 3
|
||||||
println(name[0]) // indexing gives a byte B
|
assert name[0] == byte(66) // indexing gives a byte, byte(66) == `B`
|
||||||
println(name[1..3]) // slicing gives a string 'ob'
|
assert name[1..3] == 'ob' // slicing gives a string 'ob'
|
||||||
|
|
||||||
|
// escape codes
|
||||||
windows_newline := '\r\n' // escape special characters like in C
|
windows_newline := '\r\n' // escape special characters like in C
|
||||||
assert windows_newline.len == 2
|
assert windows_newline.len == 2
|
||||||
|
|
||||||
|
// arbitrary bytes can be directly specified using `\x##` notation where `#` is
|
||||||
|
// a hex digit aardvark_str := '\x61ardvark' assert aardvark_str == 'aardvark'
|
||||||
|
assert '\xc0'[0] == byte(0xc0)
|
||||||
|
|
||||||
|
// or using octal escape `\###` notation where `#` is an octal digit
|
||||||
|
aardvark_str2 := '\141ardvark'
|
||||||
|
assert aardvark_str2 == 'aardvark'
|
||||||
|
|
||||||
|
// Unicode can be specified directly as `\u####` where # is a hex digit
|
||||||
|
// and will be converted internally to its UTF-8 representation
|
||||||
|
star_str := '\u2605' // ★
|
||||||
|
assert star_str == '★'
|
||||||
|
assert star_str == '\xe2\x98\x85' // UTF-8 can be specified this way too.
|
||||||
```
|
```
|
||||||
|
|
||||||
In V, a string is a read-only array of bytes. String data is encoded using UTF-8:
|
In V, a string is a read-only array of bytes. All Unicode characters are encoded using UTF-8:
|
||||||
|
|
||||||
```v
|
```v
|
||||||
s := 'hello 🌎' // emoji takes 4 bytes
|
s := 'hello 🌎' // emoji takes 4 bytes
|
||||||
assert s.len == 10
|
assert s.len == 10
|
||||||
@ -503,11 +520,12 @@ String values are immutable. You cannot mutate elements:
|
|||||||
mut s := 'hello 🌎'
|
mut s := 'hello 🌎'
|
||||||
s[0] = `H` // not allowed
|
s[0] = `H` // not allowed
|
||||||
```
|
```
|
||||||
|
|
||||||
> error: cannot assign to `s[i]` since V strings are immutable
|
> error: cannot assign to `s[i]` since V strings are immutable
|
||||||
|
|
||||||
Note that indexing a string will produce a `byte`, not a `rune` nor another `string`.
|
Note that indexing a string will produce a `byte`, not a `rune` nor another `string`. Indexes
|
||||||
Indexes correspond to bytes in the string, not Unicode code points. If you want to
|
correspond to _bytes_ in the string, not Unicode code points. If you want to convert the `byte` to a
|
||||||
convert the `byte` to a `string`, use the `ascii_str()` method:
|
`string`, use the `.ascii_str()` method on the `byte`:
|
||||||
|
|
||||||
```v
|
```v
|
||||||
country := 'Netherlands'
|
country := 'Netherlands'
|
||||||
@ -515,20 +533,13 @@ println(country[0]) // Output: 78
|
|||||||
println(country[0].ascii_str()) // Output: N
|
println(country[0].ascii_str()) // Output: N
|
||||||
```
|
```
|
||||||
|
|
||||||
Character literals have type `rune`. To denote them, use `
|
Both single and double quotes can be used to denote strings. For consistency, `vfmt` converts double
|
||||||
|
quotes to single quotes unless the string contains a single quote character.
|
||||||
|
|
||||||
|
For raw strings, prepend `r`. Escape handling is not done for raw strings:
|
||||||
|
|
||||||
```v
|
```v
|
||||||
rocket := `🚀`
|
s := r'hello\nworld' // the `\n` will be preserved as two characters
|
||||||
assert 'aloha!'[0] == `a`
|
|
||||||
```
|
|
||||||
|
|
||||||
Both single and double quotes can be used to denote strings. For consistency,
|
|
||||||
`vfmt` converts double quotes to single quotes unless the string contains a single quote character.
|
|
||||||
|
|
||||||
For raw strings, prepend `r`. Raw strings are not escaped:
|
|
||||||
|
|
||||||
```v
|
|
||||||
s := r'hello\nworld'
|
|
||||||
println(s) // "hello\nworld"
|
println(s) // "hello\nworld"
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -537,41 +548,79 @@ Strings can be easily converted to integers:
|
|||||||
```v
|
```v
|
||||||
s := '42'
|
s := '42'
|
||||||
n := s.int() // 42
|
n := s.int() // 42
|
||||||
|
|
||||||
|
// all int literals are supported
|
||||||
|
assert '0xc3'.int() == 195
|
||||||
|
assert '0o10'.int() == 8
|
||||||
|
assert '0b1111_0000_1010'.int() == 3850
|
||||||
|
assert '-0b1111_0000_1010'.int() == -3850
|
||||||
```
|
```
|
||||||
|
|
||||||
### Runes
|
For more advanced `string` processing and conversions, refer to the
|
||||||
A `rune` represents a unicode character and is an alias for `u32`. Runes can be created like this:
|
[vlib/strconv](https://modules.vlang.io/strconv.html) module.
|
||||||
```v
|
|
||||||
x := `🚀`
|
|
||||||
```
|
|
||||||
|
|
||||||
A string can be converted to runes by the `.runes()` method.
|
|
||||||
```v
|
|
||||||
hello := 'Hello World 👋'
|
|
||||||
hello_runes := hello.runes() // [`H`, `e`, `l`, `l`, `o`, ` `, `W`, `o`, `r`, `l`, `d`, ` `, `👋`]
|
|
||||||
```
|
|
||||||
|
|
||||||
### String interpolation
|
### String interpolation
|
||||||
|
|
||||||
Basic interpolation syntax is pretty simple - use `$` before a variable name.
|
Basic interpolation syntax is pretty simple - use `$` before a variable name. The variable will be
|
||||||
The variable will be converted to a string and embedded into the literal:
|
converted to a string and embedded into the literal:
|
||||||
|
|
||||||
```v
|
```v
|
||||||
name := 'Bob'
|
name := 'Bob'
|
||||||
println('Hello, $name!') // Hello, Bob!
|
println('Hello, $name!') // Hello, Bob!
|
||||||
```
|
```
|
||||||
It also works with fields: `'age = $user.age'`.
|
|
||||||
If you need more complex expressions, use `${}`: `'can register = ${user.age > 13}'`.
|
|
||||||
|
|
||||||
Format specifiers similar to those in C's `printf()` are also supported.
|
It also works with fields: `'age = $user.age'`. If you need more complex expressions, use `${}`:
|
||||||
`f`, `g`, `x`, etc. are optional and specify the output format.
|
`'can register = ${user.age > 13}'`.
|
||||||
The compiler takes care of the storage size, so there is no `hd` or `llu`.
|
|
||||||
|
Format specifiers similar to those in C's `printf()` are also supported. `f`, `g`, `x`, `o`, `b`,
|
||||||
|
etc. are optional and specify the output format. The compiler takes care of the storage size, so
|
||||||
|
there is no `hd` or `llu`.
|
||||||
|
|
||||||
|
To use a format specifier, follow this pattern:
|
||||||
|
|
||||||
|
`${varname:[flags][width][.precision][type]}`
|
||||||
|
|
||||||
|
- flags: may be zero or more of the following: `-` to left-align output within the field, `0` to use
|
||||||
|
`0` as the padding character instead of the default `space` character. (Note: V does not currently
|
||||||
|
support the use of `'` or `#` as format flags, and V supports but doesn't need `+` to right-align
|
||||||
|
since that's the default.)
|
||||||
|
- width: may be an integer value describing the minimum width of total field to output.
|
||||||
|
- precision: an integer value preceeded by a `.` will guarantee that many digits after the decimal
|
||||||
|
point, if the input variable is a float. Ignored if variable is an integer.
|
||||||
|
- type: `f` and `F` specify the input is a float and should be rendered as such, `e` and `E` specify
|
||||||
|
the input is a float and should be rendered as an exponent (partially broken), `g` and `G` specify
|
||||||
|
the input is a float--the renderer will use floating point notation for small values and exponent
|
||||||
|
notation for large values, `d` specifies the input is an integer and should be rendered in base-10
|
||||||
|
digits, `x` and `X` require an integer and will render it as hexadecimal digits, `o` requires an
|
||||||
|
integer and will render it as octal digits, `b` requires an integer and will render it as binary
|
||||||
|
digits, `s` requires a string (almost never used).
|
||||||
|
|
||||||
|
Note: when a numeric type can render alphabetic characters, such as hex strings or special values
|
||||||
|
like `infinity`, the lowercase version of the type forces lowercase alphabetics and the uppercase
|
||||||
|
version forces uppercase alphabetics.
|
||||||
|
|
||||||
|
Also note: in most cases, it's best to leave the format type empty. Floats will be rendered by
|
||||||
|
default as `g`, integers will be rendered by default as `d`, and `s` is almost always redundant.
|
||||||
|
There are only three cases where specifying a type is recommended:
|
||||||
|
|
||||||
|
- format strings are parsed at compile time, so specifing a type can help detect errors then
|
||||||
|
- format strings default to using lowercase letters for hex digits and the `e` in exponents. Use a
|
||||||
|
uppercase type to force the use of uppercase hex digits and an uppercase `E` in exponents.
|
||||||
|
- format strings are the most convenient way to get hex, binary or octal strings from an integer.
|
||||||
|
|
||||||
|
See
|
||||||
|
[Format Placeholder Specification](https://en.wikipedia.org/wiki/Printf_format_string#Format_placeholder_specification)
|
||||||
|
for more information.
|
||||||
|
|
||||||
```v
|
```v
|
||||||
x := 123.4567
|
x := 123.4567
|
||||||
println('x = ${x:4.2f}')
|
println('[${x:.2}]') // round to two decimal places => [123.46]
|
||||||
println('[${x:10}]') // pad with spaces on the left => [ 123.457]
|
println('[${x:10}]') // right-align with spaces on the left => [ 123.457]
|
||||||
println('[${int(x):-10}]') // pad with spaces on the right => [123 ]
|
println('[${int(x):-10}]') // left-align with spaces on the right => [123 ]
|
||||||
println('[${int(x):010}]') // pad with zeros on the left => [0000000123]
|
println('[${int(x):010}]') // pad with zeros on the left => [0000000123]
|
||||||
|
println('[${int(x):b}]') // output as binary => [1111011]
|
||||||
|
println('[${int(x):o}]') // output as octal => [173]
|
||||||
|
println('[${int(x):X}]') // output as uppercase hex => [7B]
|
||||||
```
|
```
|
||||||
|
|
||||||
### String operators
|
### String operators
|
||||||
@ -585,13 +634,14 @@ s += 'world' // `+=` is used to append to a string
|
|||||||
println(s) // "hello world"
|
println(s) // "hello world"
|
||||||
```
|
```
|
||||||
|
|
||||||
All operators in V must have values of the same type on both sides.
|
All operators in V must have values of the same type on both sides. You cannot concatenate an
|
||||||
You cannot concatenate an integer to a string:
|
integer to a string:
|
||||||
|
|
||||||
```v failcompile
|
```v failcompile
|
||||||
age := 10
|
age := 10
|
||||||
println('age = ' + age) // not allowed
|
println('age = ' + age) // not allowed
|
||||||
```
|
```
|
||||||
|
|
||||||
> error: infix expr: cannot use `int` (right expression) as `string`
|
> error: infix expr: cannot use `int` (right expression) as `string`
|
||||||
|
|
||||||
We have to either convert `age` to a `string`:
|
We have to either convert `age` to a `string`:
|
||||||
@ -608,6 +658,62 @@ age := 12
|
|||||||
println('age = $age')
|
println('age = $age')
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Runes
|
||||||
|
|
||||||
|
A `rune` represents a single Unicode character and is an alias for `u32`. To denote them, use `
|
||||||
|
(backticks) :
|
||||||
|
|
||||||
|
```v
|
||||||
|
rocket := `🚀`
|
||||||
|
```
|
||||||
|
|
||||||
|
A `rune` can be converted to a UTF-8 string by using the `.str()` method.
|
||||||
|
|
||||||
|
```v
|
||||||
|
rocket := `🚀`
|
||||||
|
assert rocket.str() == '🚀'
|
||||||
|
```
|
||||||
|
|
||||||
|
A `rune` can be converted to UTF-8 bytes by using the `.bytes()` method.
|
||||||
|
|
||||||
|
```v
|
||||||
|
rocket := `🚀`
|
||||||
|
assert rocket.bytes() == [byte(0xf0), 0x9f, 0x9a, 0x80]
|
||||||
|
```
|
||||||
|
|
||||||
|
Hex, Unicode, and Octal escape sequences also work in a `rune` literal:
|
||||||
|
|
||||||
|
```v
|
||||||
|
assert `\x61` == `a`
|
||||||
|
assert `\141` == `a`
|
||||||
|
assert `\u0061` == `a`
|
||||||
|
|
||||||
|
// multibyte literals work too
|
||||||
|
assert `\u2605` == `★`
|
||||||
|
assert `\u2605`.bytes() == [byte(0xe2), 0x98, 0x85]
|
||||||
|
assert `\xe2\x98\x85`.bytes() == [byte(0xe2), 0x98, 0x85]
|
||||||
|
assert `\342\230\205`.bytes() == [byte(0xe2), 0x98, 0x85]
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that `rune` literals use the same escape syntax as strings, but they can only hold one unicode
|
||||||
|
character. Therefore, if your code does not specify a single Unicode character, you will receive an
|
||||||
|
error at compile time.
|
||||||
|
|
||||||
|
Also remember that strings are indexed as bytes, not runes, so beware:
|
||||||
|
|
||||||
|
```v
|
||||||
|
rocket_string := '🚀'
|
||||||
|
assert rocket_string[0] != `🚀`
|
||||||
|
assert 'aloha!'[0] == `a`
|
||||||
|
```
|
||||||
|
|
||||||
|
A string can be converted to runes by the `.runes()` method.
|
||||||
|
|
||||||
|
```v
|
||||||
|
hello := 'Hello World 👋'
|
||||||
|
hello_runes := hello.runes() // [`H`, `e`, `l`, `l`, `o`, ` `, `W`, `o`, `r`, `l`, `d`, ` `, `👋`]
|
||||||
|
```
|
||||||
|
|
||||||
### Numbers
|
### Numbers
|
||||||
|
|
||||||
```v
|
```v
|
||||||
|
0
examples/v_script.vsh
Normal file → Executable file
0
examples/v_script.vsh
Normal file → Executable file
@ -1307,6 +1307,28 @@ fn decode_h_escapes(s string, start int, escapes_pos []int) string {
|
|||||||
return ss.join('')
|
return ss.join('')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle single-byte inline octal escapes like '\###'
|
||||||
|
fn decode_o_escapes(s string, start int, escapes_pos []int) string {
|
||||||
|
if escapes_pos.len == 0 {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
mut ss := []string{cap: escapes_pos.len}
|
||||||
|
ss << s[..escapes_pos.first() - start] // everything before the first escape code position
|
||||||
|
for i, pos in escapes_pos {
|
||||||
|
idx := pos - start
|
||||||
|
end_idx := idx + 4 // "\XXX".len == 4
|
||||||
|
// notice this function doesn't do any decoding... it just replaces '\141' with the byte 0o141
|
||||||
|
ss << [byte(strconv.parse_uint(s[idx + 1..end_idx], 8, 8) or { 0 })].bytestr()
|
||||||
|
if i + 1 < escapes_pos.len {
|
||||||
|
ss << s[end_idx..escapes_pos[i + 1] - start]
|
||||||
|
} else {
|
||||||
|
ss << s[end_idx..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ss.join('')
|
||||||
|
}
|
||||||
|
|
||||||
|
// decode the flagged unicode escape sequences into their utf-8 bytes
|
||||||
fn decode_u_escapes(s string, start int, escapes_pos []int) string {
|
fn decode_u_escapes(s string, start int, escapes_pos []int) string {
|
||||||
if escapes_pos.len == 0 {
|
if escapes_pos.len == 0 {
|
||||||
return s
|
return s
|
||||||
@ -1348,9 +1370,10 @@ fn trim_slash_line_break(s string) string {
|
|||||||
/// possibilities:
|
/// possibilities:
|
||||||
/// single chars like `a`, `b` => 'a', 'b'
|
/// single chars like `a`, `b` => 'a', 'b'
|
||||||
/// escaped single chars like `\\`, `\``, `\n` => '\\', '`', '\n'
|
/// escaped single chars like `\\`, `\``, `\n` => '\\', '`', '\n'
|
||||||
/// escaped hex bytes like `\x01`, `\x61` => '\x01', 'a'
|
/// escaped single hex bytes like `\x01`, `\x61` => '\x01', 'a'
|
||||||
/// escaped multibyte runes like `\xe29885` => (★)
|
|
||||||
/// escaped unicode literals like `\u2605`
|
/// escaped unicode literals like `\u2605`
|
||||||
|
/// escaped utf8 runes in hex like `\xe2\x98\x85` => (★)
|
||||||
|
/// escaped utf8 runes in octal like `\342\230\205` => (★)
|
||||||
fn (mut s Scanner) ident_char() string {
|
fn (mut s Scanner) ident_char() string {
|
||||||
lspos := token.Position{
|
lspos := token.Position{
|
||||||
line_nr: s.line_nr
|
line_nr: s.line_nr
|
||||||
@ -1365,6 +1388,7 @@ fn (mut s Scanner) ident_char() string {
|
|||||||
// set flags for advanced escapes first
|
// set flags for advanced escapes first
|
||||||
escaped_hex := s.expect('\\x', start + 1)
|
escaped_hex := s.expect('\\x', start + 1)
|
||||||
escaped_unicode := s.expect('\\u', start + 1)
|
escaped_unicode := s.expect('\\u', start + 1)
|
||||||
|
escaped_octal := !escaped_hex && !escaped_unicode && s.expect('\\', start + 1)
|
||||||
|
|
||||||
// walk the string to get characters up to the next backtick
|
// walk the string to get characters up to the next backtick
|
||||||
for {
|
for {
|
||||||
@ -1390,65 +1414,40 @@ fn (mut s Scanner) ident_char() string {
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
if len != 1 {
|
if len != 1 {
|
||||||
|
// the string inside the backticks is longer than one character
|
||||||
|
// but we might only have one rune... attempt to decode escapes
|
||||||
// if the content expresses an escape code, it will have an even number of characters
|
// if the content expresses an escape code, it will have an even number of characters
|
||||||
// e.g. \x61 or \u2605
|
// e.g. (octal) \141 (hex) \x61 or (unicode) \u2605
|
||||||
if (c.len % 2 == 0) && (escaped_hex || escaped_unicode) {
|
// we don't handle binary escape codes in rune literals
|
||||||
|
orig := c
|
||||||
|
if (c.len % 2 == 0) && (escaped_hex || escaped_unicode || escaped_octal) {
|
||||||
if escaped_unicode {
|
if escaped_unicode {
|
||||||
|
// there can only be one, so attempt to decode it now
|
||||||
c = decode_u_escapes(c, 0, [0])
|
c = decode_u_escapes(c, 0, [0])
|
||||||
} else {
|
} else {
|
||||||
// we have to handle hex ourselves
|
// find escape sequence start positions
|
||||||
ascii_0 := byte(0x30)
|
mut escapes_pos := []int{}
|
||||||
ascii_a := byte(0x61)
|
for i, v in c {
|
||||||
mut accumulated := []byte{}
|
if v == `\\` {
|
||||||
val := c[2..c.len].to_lower() // 0A -> 0a
|
escapes_pos << i
|
||||||
mut offset := 0
|
|
||||||
// take two characters at a time, parse as hex and add to bytes
|
|
||||||
for {
|
|
||||||
if offset >= val.len - 1 {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
mut byteval := byte(0)
|
|
||||||
big := val[offset]
|
|
||||||
little := val[offset + 1]
|
|
||||||
if !big.is_hex_digit() {
|
|
||||||
accumulated.clear()
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
if !little.is_hex_digit() {
|
if escaped_hex {
|
||||||
accumulated.clear()
|
c = decode_h_escapes(c, 0, escapes_pos)
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
if big.is_digit() {
|
|
||||||
byteval |= (big - ascii_0) << 4
|
|
||||||
} else {
|
} else {
|
||||||
byteval |= (big - ascii_a + 10) << 4
|
c = decode_o_escapes(c, 0, escapes_pos)
|
||||||
}
|
|
||||||
if little.is_digit() {
|
|
||||||
byteval |= (little - ascii_0)
|
|
||||||
} else {
|
|
||||||
byteval |= (little - ascii_a + 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
accumulated << byteval
|
|
||||||
offset += 2
|
|
||||||
}
|
|
||||||
if accumulated.len > 0 {
|
|
||||||
c = accumulated.bytestr()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// the string inside the backticks is longer than one character
|
|
||||||
// but we might only have one rune, say in the case
|
|
||||||
u := c.runes()
|
u := c.runes()
|
||||||
if u.len != 1 {
|
if u.len != 1 {
|
||||||
if escaped_hex || escaped_unicode {
|
if escaped_hex || escaped_unicode {
|
||||||
s.error('invalid character literal (escape sequence did not refer to a singular rune)')
|
s.error('invalid character literal `$orig` => `$c` ($u) (escape sequence did not refer to a singular rune)')
|
||||||
} else {
|
} else {
|
||||||
s.add_error_detail_with_pos('use quotes for strings, backticks for characters',
|
s.add_error_detail_with_pos('use quotes for strings, backticks for characters',
|
||||||
lspos)
|
lspos)
|
||||||
s.error('invalid character literal (more than one character)')
|
s.error('invalid character literal `$orig` => `$c` ($u) (more than one character)')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -150,13 +150,19 @@ fn test_ref_ref_array_ref_ref_foo() {
|
|||||||
assert result[6] == .name
|
assert result[6] == .name
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_escape_string() {
|
fn test_escape_rune() {
|
||||||
// these assertions aren't helpful...
|
// these lines work if the v compiler is working
|
||||||
// they test the vlib built-in to the compiler,
|
// will not work until v compiler on github is updated
|
||||||
// but we want to test this module before compilation
|
// assert `\x61` == `a`
|
||||||
assert '\x61' == 'a'
|
// assert `\u0061` == `a`
|
||||||
assert '\x62' == 'b'
|
|
||||||
// assert `\x61` == `a` // will work after pull request goes through
|
// will not work until PR is accepted
|
||||||
|
// assert `\141` == `a`
|
||||||
|
// assert `\xe2\x98\x85` == `★`
|
||||||
|
// assert `\342\230\205` == `★`
|
||||||
|
|
||||||
|
// the following lines test the scanner module
|
||||||
|
// even before it is compiled into the v executable
|
||||||
|
|
||||||
// SINGLE CHAR ESCAPES
|
// SINGLE CHAR ESCAPES
|
||||||
// SINGLE CHAR APOSTROPHE
|
// SINGLE CHAR APOSTROPHE
|
||||||
@ -187,14 +193,30 @@ fn test_escape_string() {
|
|||||||
// SINGLE CHAR INCORRECT ESCAPE
|
// SINGLE CHAR INCORRECT ESCAPE
|
||||||
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
||||||
|
|
||||||
// SINGLE CHAR MULTI-BYTE UTF-8
|
// SINGLE CHAR MULTI-BYTE UTF-8 (hex)
|
||||||
// Compilation blocked by vlib/v/checker/check_types.v, but works in the repl
|
result = scan_tokens(r'`\xe2\x98\x85`')
|
||||||
result = scan_tokens(r'`\xe29885`')
|
|
||||||
assert result[0].lit == r'★'
|
assert result[0].lit == r'★'
|
||||||
|
|
||||||
|
// SINGLE CHAR MULTI-BYTE UTF-8 (octal)
|
||||||
|
result = scan_tokens(r'`\342\230\205`')
|
||||||
|
assert result[0].lit == r'★'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_escape_string() {
|
||||||
|
// these lines work if the v compiler is working
|
||||||
|
assert '\x61' == 'a'
|
||||||
|
assert '\x62' == 'b'
|
||||||
|
assert '\u0061' == 'a'
|
||||||
|
assert '\141' == 'a'
|
||||||
|
assert '\xe2\x98\x85' == '★'
|
||||||
|
assert '\342\230\205' == '★'
|
||||||
|
|
||||||
|
// the following lines test the scanner module
|
||||||
|
// even before it is compiled into the v executable
|
||||||
|
|
||||||
// STRING ESCAPES =================
|
// STRING ESCAPES =================
|
||||||
// STRING APOSTROPHE
|
// STRING APOSTROPHE
|
||||||
result = scan_tokens(r"'\''")
|
mut result := scan_tokens(r"'\''")
|
||||||
assert result[0].kind == .string
|
assert result[0].kind == .string
|
||||||
assert result[0].lit == r"\'"
|
assert result[0].lit == r"\'"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user