1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

scanner: print multibyte char for invalid char error (#8804)

This commit is contained in:
zakuro 2021-02-18 22:43:39 +09:00 committed by GitHub
parent 0142d58aa6
commit 1891ebf22d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 9 deletions

View File

@ -23,7 +23,7 @@ pub fn len(s string) int {
mut index := 0
for {
ch_len := utf8util_char_len(s[index])
ch_len := char_len(s[index])
index += ch_len
count++
if index >= s.len {
@ -38,12 +38,17 @@ pub fn u_len(s ustring) int {
return len(s.s)
}
// char_len calculate the length in bytes of a utf8 char
pub fn char_len(b byte) int {
return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1
}
// get_uchar convert a unicode glyph in string[index] into a int unicode char
pub fn get_uchar(s string, index int) int {
mut res := 0
mut ch_len := 0
if s.len > 0 {
ch_len = utf8util_char_len(s[index])
if s.len > 0 {
ch_len = char_len(s[index])
if ch_len == 1 {
return u16(s[index])
@ -153,10 +158,6 @@ pub fn is_uchar_global_punct( uchar int ) bool {
Private functions
*/
// utf8util_char_len calculate the length in bytes of a utf8 char
fn utf8util_char_len(b byte) int {
return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1
}
//
// if upper_flag == true then make low ==> upper conversion
@ -168,7 +169,7 @@ fn up_low(s string, upper_flag bool) string {
mut str_res := unsafe {malloc(s.len + 1)}
for {
ch_len := utf8util_char_len(s[index])
ch_len := char_len(s[index])
if ch_len == 1 {
if upper_flag==true {

View File

@ -0,0 +1,3 @@
vlib/v/checker/tests/invalid_char_err.vv:1:1: error: invalid character `🐈`
1 | 🐈println('')
| ^

View File

@ -0,0 +1 @@
🐈println('')

View File

@ -4,6 +4,7 @@
module scanner
import os
import encoding.utf8
import v.token
import v.pref
import v.util
@ -1002,12 +1003,19 @@ fn (mut s Scanner) text_scan() token.Token {
return s.end_of_file()
}
}
s.error('invalid character `$c.ascii_str()`')
s.invalid_character()
break
}
return s.end_of_file()
}
fn (mut s Scanner) invalid_character() {
len := utf8.char_len(s.text[s.pos])
end := util.imin(s.pos + len, s.text.len)
c := s.text[s.pos..end]
s.error('invalid character `$c`')
}
fn (s &Scanner) current_column() int {
return s.pos - s.last_nl_pos
}