mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
builtin: correct error underline for unicode wide chars (#9010)
This commit is contained in:
parent
e937d6249c
commit
ce115dcbe0
@ -22,26 +22,28 @@ pub fn utf32_to_str_no_malloc(code u32, buf voidptr) string {
|
|||||||
mut res := ''
|
mut res := ''
|
||||||
unsafe {
|
unsafe {
|
||||||
mut buffer := byteptr(buf)
|
mut buffer := byteptr(buf)
|
||||||
if icode <= 127 { /* 0x7F */
|
if icode <= 127 {
|
||||||
|
// 0x7F
|
||||||
buffer[0] = byte(icode)
|
buffer[0] = byte(icode)
|
||||||
res = tos(buffer, 1)
|
res = tos(buffer, 1)
|
||||||
}
|
} else if icode <= 2047 {
|
||||||
else if icode <= 2047 { /* 0x7FF */
|
// 0x7FF
|
||||||
buffer[0] = 192 | byte(icode>>6) /* 0xC0 - 110xxxxx */
|
buffer[0] = 192 | byte(icode >> 6) // 0xC0 - 110xxxxx
|
||||||
buffer[1] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[1] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
res = tos(buffer, 2)
|
res = tos(buffer, 2)
|
||||||
}
|
} else if icode <= 65535 {
|
||||||
else if icode <= 65535 { /* 0xFFFF */
|
// 0xFFFF
|
||||||
buffer[0] = 224 | byte(icode>>12)/* 0xE0 - 1110xxxx */
|
buffer[0] = 224 | byte(icode >> 12) // 0xE0 - 1110xxxx
|
||||||
buffer[1] = 128 | (byte(icode>>6) & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[1] = 128 | (byte(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
buffer[2] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[2] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
res = tos(buffer, 3)
|
res = tos(buffer, 3)
|
||||||
}
|
}
|
||||||
else if icode <= 1114111/* 0x10FFFF */ {
|
// 0x10FFFF
|
||||||
buffer[0] = 240 | byte(icode>>18) /* 0xF0 - 11110xxx */
|
else if icode <= 1114111 {
|
||||||
buffer[1] = 128 | (byte(icode>>12) & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[0] = 240 | byte(icode >> 18) // 0xF0 - 11110xxx
|
||||||
buffer[2] = 128 | (byte(icode>>6) & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[1] = 128 | (byte(icode >> 12) & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
buffer[3] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
|
buffer[2] = 128 | (byte(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
|
buffer[3] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
|
||||||
res = tos(buffer, 4)
|
res = tos(buffer, 4)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -80,15 +82,13 @@ fn utf8_len(c byte) int {
|
|||||||
if (x & 240) != 0 {
|
if (x & 240) != 0 {
|
||||||
// 0xF0
|
// 0xF0
|
||||||
x >>= 4
|
x >>= 4
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
b += 4
|
b += 4
|
||||||
}
|
}
|
||||||
if (x & 12) != 0 {
|
if (x & 12) != 0 {
|
||||||
// 0x0C
|
// 0x0C
|
||||||
x >>= 2
|
x >>= 2
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
b += 2
|
b += 2
|
||||||
}
|
}
|
||||||
if (x & 2) == 0 {
|
if (x & 2) == 0 {
|
||||||
@ -114,7 +114,9 @@ fn utf8_str_len(s string) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate string length for formatting, i.e. number of "characters"
|
// Calculate string length for formatting, i.e. number of "characters"
|
||||||
fn utf8_str_visible_length(s string) int {
|
// This is simplified implementation. if you need specification compliant width,
|
||||||
|
// use utf8.east_asian.display_width.
|
||||||
|
pub fn utf8_str_visible_length(s string) int {
|
||||||
mut l := 0
|
mut l := 0
|
||||||
mut ul := 1
|
mut ul := 1
|
||||||
for i := 0; i < s.len; i += ul {
|
for i := 0; i < s.len; i += ul {
|
||||||
@ -129,24 +131,58 @@ fn utf8_str_visible_length(s string) int {
|
|||||||
return l
|
return l
|
||||||
}
|
}
|
||||||
l++
|
l++
|
||||||
// recognize combining characters
|
// recognize combining characters and wide characters
|
||||||
if c == 0xcc || c == 0xcd {
|
match ul {
|
||||||
r := (u16(c) << 8) | unsafe {s.str[i+1]}
|
2 {
|
||||||
if r >= 0xcc80 && r < 0xcdb0 { // diacritical marks
|
r := u64((u16(c) << 8) | unsafe { s.str[i + 1] })
|
||||||
|
if r >= 0xcc80 && r < 0xcdb0 {
|
||||||
|
// diacritical marks
|
||||||
l--
|
l--
|
||||||
}
|
}
|
||||||
} else if c == 0xe1 || c == 0xe2 || c == 0xef {
|
}
|
||||||
r := (u32(c) << 16) | unsafe {(u32(s.str[i+1]) << 8) | s.str[i+2]}
|
3 {
|
||||||
// diacritical marks extended 0xe1aab0 - 0xe1ac80
|
r := u64((u32(c) << 16) | unsafe { (u32(s.str[i + 1]) << 8) | s.str[i + 2] })
|
||||||
// diacritical marks supplement 0xe1b780 - 0xe1b880
|
// diacritical marks extended
|
||||||
// diacritical marks for symbols 0xe28390 - 0xe28480
|
// diacritical marks supplement
|
||||||
// half marks 0xefb8a0 - 0xefb8b0
|
// diacritical marks for symbols
|
||||||
if (r >= 0xe1aab0 && r < 0xe1ac80)
|
if (r >= 0xe1aab0 && r <= 0xe1ac7f)
|
||||||
|| (r >= 0xe1b780 && r < 0xe1b880)
|
|| (r >= 0xe1b780 && r <= 0xe1b87f)
|
||||||
|| (r >= 0xe28390 && r < 0xe28480)
|
|| (r >= 0xe28390 && r <= 0xe2847f)
|
||||||
|| (r >= 0xefb8a0 && r < 0xefb8b0) {
|
|| (r >= 0xefb8a0 && r <= 0xefb8af) {
|
||||||
|
// diacritical marks
|
||||||
l--
|
l--
|
||||||
}
|
}
|
||||||
|
// Hangru
|
||||||
|
// CJK Unified Ideographics
|
||||||
|
// Hangru
|
||||||
|
// CJK
|
||||||
|
else if (r >= 0xe18480 && r <= 0xe1859f)
|
||||||
|
|| (r >= 0xe2ba80 && r <= 0xe2bf95)
|
||||||
|
|| (r >= 0xe38080 && r <= 0xe4b77f)
|
||||||
|
|| (r >= 0xe4b880 && r <= 0xea807f)
|
||||||
|
|| (r >= 0xeaa5a0 && r <= 0xeaa79f)
|
||||||
|
|| (r >= 0xeab080 && r <= 0xed9eaf)
|
||||||
|
|| (r >= 0xefa480 && r <= 0xefac7f)
|
||||||
|
|| (r >= 0xefb8b8 && r <= 0xefb9af) {
|
||||||
|
// half marks
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
4 {
|
||||||
|
r := u64((u32(c) << 24) | unsafe {
|
||||||
|
(u32(s.str[i + 1]) << 16) | (u32(s.str[i + 2]) << 8) | s.str[i + 3]
|
||||||
|
})
|
||||||
|
// Enclosed Ideographic Supplement
|
||||||
|
// Emoji
|
||||||
|
// CJK Unified Ideographs Extension B-G
|
||||||
|
if (r >= 0x0f9f8880 && r <= 0xf09f8a8f)
|
||||||
|
|| (r >= 0xf09f8c80 && r <= 0xf09f9c90)
|
||||||
|
|| (r >= 0xf09fa490 && r <= 0xf09fa7af)
|
||||||
|
|| (r >= 0xff0a08080 && r <= 0xf180807f) {
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return l
|
return l
|
||||||
|
@ -4407,15 +4407,15 @@ fn (mut c Checker) match_exprs(mut node ast.MatchExpr, cond_type_sym table.TypeS
|
|||||||
for expr in branch.exprs {
|
for expr in branch.exprs {
|
||||||
mut key := ''
|
mut key := ''
|
||||||
if expr is ast.RangeExpr {
|
if expr is ast.RangeExpr {
|
||||||
mut low := 0
|
mut low := i64(0)
|
||||||
mut high := 0
|
mut high := i64(0)
|
||||||
c.expected_type = node.expected_type
|
c.expected_type = node.expected_type
|
||||||
low_expr := expr.low
|
low_expr := expr.low
|
||||||
high_expr := expr.high
|
high_expr := expr.high
|
||||||
if low_expr is ast.IntegerLiteral {
|
if low_expr is ast.IntegerLiteral {
|
||||||
if high_expr is ast.IntegerLiteral {
|
if high_expr is ast.IntegerLiteral {
|
||||||
low = low_expr.val.int()
|
low = low_expr.val.i64()
|
||||||
high = high_expr.val.int()
|
high = high_expr.val.i64()
|
||||||
} else {
|
} else {
|
||||||
c.error('mismatched range types', low_expr.pos)
|
c.error('mismatched range types', low_expr.pos)
|
||||||
}
|
}
|
||||||
@ -4430,6 +4430,11 @@ fn (mut c Checker) match_exprs(mut node ast.MatchExpr, cond_type_sym table.TypeS
|
|||||||
typ := c.table.type_to_str(c.expr(expr.low))
|
typ := c.table.type_to_str(c.expr(expr.low))
|
||||||
c.error('cannot use type `$typ` in match range', branch.pos)
|
c.error('cannot use type `$typ` in match range', branch.pos)
|
||||||
}
|
}
|
||||||
|
high_low_cutoff := 1000
|
||||||
|
if high - low > high_low_cutoff {
|
||||||
|
c.warn('more than $high_low_cutoff possibilities ($low ... $high) in match range',
|
||||||
|
branch.pos)
|
||||||
|
}
|
||||||
for i in low .. high + 1 {
|
for i in low .. high + 1 {
|
||||||
key = i.str()
|
key = i.str()
|
||||||
val := if key in branch_exprs { branch_exprs[key] } else { 0 }
|
val := if key in branch_exprs { branch_exprs[key] } else { 0 }
|
||||||
|
55
vlib/v/checker/tests/error_with_unicode.out
Normal file
55
vlib/v/checker/tests/error_with_unicode.out
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
vlib/v/checker/tests/error_with_unicode.vv:5:17: error: cannot use `int literal` as `string` in argument 2 to `f1`
|
||||||
|
3 |
|
||||||
|
4 | fn main() {
|
||||||
|
5 | f1('🐀🐈', 0)
|
||||||
|
| ^
|
||||||
|
6 | f2(0, '🐟🐧')
|
||||||
|
7 | mut n := 0
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:6:8: error: cannot use `string` as `int` in argument 2 to `f2`
|
||||||
|
4 | fn main() {
|
||||||
|
5 | f1('🐀🐈', 0)
|
||||||
|
6 | f2(0, '🐟🐧')
|
||||||
|
| ~~~~~~
|
||||||
|
7 | mut n := 0
|
||||||
|
8 | n = '漢字'
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:8:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
6 | f2(0, '🐟🐧')
|
||||||
|
7 | mut n := 0
|
||||||
|
8 | n = '漢字'
|
||||||
|
| ~~~~~~
|
||||||
|
9 | n = 'ひらがな'
|
||||||
|
10 | n = '简体字'
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:9:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
7 | mut n := 0
|
||||||
|
8 | n = '漢字'
|
||||||
|
9 | n = 'ひらがな'
|
||||||
|
| ~~~~~~~~~~
|
||||||
|
10 | n = '简体字'
|
||||||
|
11 | n = '繁體字'
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:10:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
8 | n = '漢字'
|
||||||
|
9 | n = 'ひらがな'
|
||||||
|
10 | n = '简体字'
|
||||||
|
| ~~~~~~~~
|
||||||
|
11 | n = '繁體字'
|
||||||
|
12 | n = '한글'
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:11:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
9 | n = 'ひらがな'
|
||||||
|
10 | n = '简体字'
|
||||||
|
11 | n = '繁體字'
|
||||||
|
| ~~~~~~~~
|
||||||
|
12 | n = '한글'
|
||||||
|
13 | n = 'Кириллица'
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:12:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
10 | n = '简体字'
|
||||||
|
11 | n = '繁體字'
|
||||||
|
12 | n = '한글'
|
||||||
|
| ~~~~~~
|
||||||
|
13 | n = 'Кириллица'
|
||||||
|
14 | }
|
||||||
|
vlib/v/checker/tests/error_with_unicode.vv:13:6: error: cannot assign to `n`: expected `int`, not `string`
|
||||||
|
11 | n = '繁體字'
|
||||||
|
12 | n = '한글'
|
||||||
|
13 | n = 'Кириллица'
|
||||||
|
| ~~~~~~~~~~~
|
||||||
|
14 | }
|
14
vlib/v/checker/tests/error_with_unicode.vv
Normal file
14
vlib/v/checker/tests/error_with_unicode.vv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
fn f1(_ string, _ string) {}
|
||||||
|
fn f2(_ int, _ int) {}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
f1('🐀🐈', 0)
|
||||||
|
f2(0, '🐟🐧')
|
||||||
|
mut n := 0
|
||||||
|
n = '漢字'
|
||||||
|
n = 'ひらがな'
|
||||||
|
n = '简体字'
|
||||||
|
n = '繁體字'
|
||||||
|
n = '한글'
|
||||||
|
n = 'Кириллица'
|
||||||
|
}
|
@ -4,6 +4,7 @@
|
|||||||
module util
|
module util
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import strings
|
||||||
import term
|
import term
|
||||||
import v.token
|
import v.token
|
||||||
|
|
||||||
@ -132,14 +133,22 @@ pub fn source_context(kind string, source string, column int, pos token.Position
|
|||||||
// line, so that it prints the ^ character exactly on the *same spot*
|
// line, so that it prints the ^ character exactly on the *same spot*
|
||||||
// where it is needed. That is the reason we can not just
|
// where it is needed. That is the reason we can not just
|
||||||
// use strings.repeat(` `, col) to form it.
|
// use strings.repeat(` `, col) to form it.
|
||||||
mut pointerline := ''
|
mut pointerline_builder := strings.new_builder(sline.len)
|
||||||
for bchar in sline[..start_column] {
|
for i := 0; i < start_column; {
|
||||||
x := if bchar.is_space() { bchar } else { ` ` }
|
if sline[i].is_space() {
|
||||||
pointerline += x.ascii_str()
|
pointerline_builder.write_b(sline[i])
|
||||||
|
i++
|
||||||
|
} else {
|
||||||
|
char_len := utf8_char_len(sline[i])
|
||||||
|
spaces := ' '.repeat(utf8_str_visible_length(sline[i..i + char_len]))
|
||||||
|
pointerline_builder.write_string(spaces)
|
||||||
|
i += char_len
|
||||||
}
|
}
|
||||||
underline := if pos.len > 1 { '~'.repeat(end_column - start_column) } else { '^' }
|
}
|
||||||
pointerline += bold(color(kind, underline))
|
underline_len := utf8_str_visible_length(sline[start_column..end_column])
|
||||||
clines << ' | ' + pointerline.replace('\t', tab_spaces)
|
underline := if underline_len > 1 { '~'.repeat(underline_len) } else { '^' }
|
||||||
|
pointerline_builder.write_string(bold(color(kind, underline)))
|
||||||
|
clines << ' | ' + pointerline_builder.str().replace('\t', tab_spaces)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return clines
|
return clines
|
||||||
|
Loading…
Reference in New Issue
Block a user