diff --git a/vlib/encoding/utf8/utf8_util.v b/vlib/encoding/utf8/utf8_util.v index c8806a6b87..704c9d43e2 100644 --- a/vlib/encoding/utf8/utf8_util.v +++ b/vlib/encoding/utf8/utf8_util.v @@ -23,7 +23,7 @@ pub fn len(s string) int { mut index := 0 for { - ch_len := utf8util_char_len(s[index]) + ch_len := char_len(s[index]) index += ch_len count++ if index >= s.len { @@ -38,12 +38,17 @@ pub fn u_len(s ustring) int { return len(s.s) } +// char_len calculate the length in bytes of a utf8 char +pub fn char_len(b byte) int { + return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1 +} + // get_uchar convert a unicode glyph in string[index] into a int unicode char pub fn get_uchar(s string, index int) int { mut res := 0 mut ch_len := 0 - if s.len > 0 { - ch_len = utf8util_char_len(s[index]) + if s.len > 0 { + ch_len = char_len(s[index]) if ch_len == 1 { return u16(s[index]) @@ -153,10 +158,6 @@ pub fn is_uchar_global_punct( uchar int ) bool { Private functions */ -// utf8util_char_len calculate the length in bytes of a utf8 char -fn utf8util_char_len(b byte) int { - return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1 -} // // if upper_flag == true then make low ==> upper conversion @@ -168,7 +169,7 @@ fn up_low(s string, upper_flag bool) string { mut str_res := unsafe {malloc(s.len + 1)} for { - ch_len := utf8util_char_len(s[index]) + ch_len := char_len(s[index]) if ch_len == 1 { if upper_flag==true { diff --git a/vlib/v/checker/tests/invalid_char_err.out b/vlib/v/checker/tests/invalid_char_err.out new file mode 100644 index 0000000000..e4a2b5c987 --- /dev/null +++ b/vlib/v/checker/tests/invalid_char_err.out @@ -0,0 +1,3 @@ +vlib/v/checker/tests/invalid_char_err.vv:1:1: error: invalid character `🐈` + 1 | 🐈println('') + | ^ \ No newline at end of file diff --git a/vlib/v/checker/tests/invalid_char_err.vv b/vlib/v/checker/tests/invalid_char_err.vv new file mode 100644 index 0000000000..0a27589655 --- /dev/null +++ b/vlib/v/checker/tests/invalid_char_err.vv @@ -0,0 +1 @@ +🐈println('') diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 8918d86aba..b7e4d5dcee 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -4,6 +4,7 @@ module scanner import os +import encoding.utf8 import v.token import v.pref import v.util @@ -1002,12 +1003,19 @@ fn (mut s Scanner) text_scan() token.Token { return s.end_of_file() } } - s.error('invalid character `$c.ascii_str()`') + s.invalid_character() break } return s.end_of_file() } +fn (mut s Scanner) invalid_character() { + len := utf8.char_len(s.text[s.pos]) + end := util.imin(s.pos + len, s.text.len) + c := s.text[s.pos..end] + s.error('invalid character `$c`') +} + fn (s &Scanner) current_column() int { return s.pos - s.last_nl_pos }