mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
websocket utf8: move utf8 functions from websocket to encoding.utf8, add utf8_test.v (4/4) (#5924)
This commit is contained in:

committed by
GitHub

parent
38aa5d6930
commit
b0d76c59f7
90
vlib/encoding/utf8/utf8.v
Normal file
90
vlib/encoding/utf8/utf8.v
Normal file
@ -0,0 +1,90 @@
|
||||
module utf8
|
||||
|
||||
struct Utf8State {
|
||||
mut:
|
||||
index int
|
||||
subindex int
|
||||
failed bool
|
||||
}
|
||||
|
||||
pub fn validate_str(str string) bool {
|
||||
return validate(str.str, str.len)
|
||||
}
|
||||
|
||||
pub fn validate(data byteptr, len int) bool {
|
||||
mut state := Utf8State{}
|
||||
for i := 0; i < len; i++ {
|
||||
s := data[i]
|
||||
if s == 0 {
|
||||
break
|
||||
}
|
||||
state.next_state(s)
|
||||
if state.failed {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return !state.failed && state.subindex <= 0
|
||||
}
|
||||
|
||||
fn (mut s Utf8State) seq(r0, r1, is_tail bool) bool {
|
||||
if s.subindex == 0 || (s.index > 1 && s.subindex == 1) || (s.index >= 6 && s.subindex == 2) {
|
||||
if (s.subindex == 0 && r0) || (s.subindex == 1 && r1) || (s.subindex == 2 && is_tail) {
|
||||
s.subindex++
|
||||
return true
|
||||
}
|
||||
goto next
|
||||
} else {
|
||||
s.failed = true
|
||||
if is_tail {
|
||||
s.index = 0
|
||||
s.subindex = 0
|
||||
s.failed = false
|
||||
}
|
||||
return true
|
||||
}
|
||||
next:
|
||||
s.index++
|
||||
s.subindex = 0
|
||||
return false
|
||||
}
|
||||
|
||||
fn (mut s Utf8State) next_state(c byte) {
|
||||
// sequence 1
|
||||
if s.index == 0 {
|
||||
if (c >= 0x00 + 1 && c <= 0x7F) || c == 0x00 {
|
||||
return
|
||||
}
|
||||
s.index++
|
||||
s.subindex = 0
|
||||
}
|
||||
is_tail := c >= 0x80 && c <= 0xBF
|
||||
// sequence 2
|
||||
if s.index == 1 && s.seq(c >= 0xC2 && c <= 0xDF, false, is_tail) {
|
||||
return
|
||||
}
|
||||
// sequence 3
|
||||
if s.index == 2 && s.seq(c == 0xE0, c >= 0xA0 && c <= 0xBF, is_tail) {
|
||||
return
|
||||
}
|
||||
if s.index == 3 && s.seq(c >= 0xE1 && c <= 0xEC, c >= 0x80 && c <= 0xBF, is_tail) {
|
||||
return
|
||||
}
|
||||
if s.index == 4 && s.seq(c == 0xED, c >= 0x80 && c <= 0x9F, is_tail) {
|
||||
return
|
||||
}
|
||||
if s.index == 5 && s.seq(c >= 0xEE && c <= 0xEF, c >= 0x80 && c <= 0xBF, is_tail) {
|
||||
return
|
||||
}
|
||||
// sequence 4
|
||||
if s.index == 6 && s.seq(c == 0xF0, c >= 0x90 && c <= 0xBF, is_tail) {
|
||||
return
|
||||
}
|
||||
if s.index == 7 && s.seq(c >= 0xF1 && c <= 0xF3, c >= 0x80 && c <= 0xBF, is_tail) {
|
||||
return
|
||||
}
|
||||
if s.index == 8 && s.seq(c == 0xF4, c >= 0x80 && c <= 0x8F, is_tail) {
|
||||
return
|
||||
}
|
||||
// we should never reach here
|
||||
s.failed = true
|
||||
}
|
9
vlib/encoding/utf8/utf8_test.v
Normal file
9
vlib/encoding/utf8/utf8_test.v
Normal file
@ -0,0 +1,9 @@
|
||||
import encoding.utf8 { validate_str }
|
||||
|
||||
fn test_validate_str() {
|
||||
assert validate_str('añçá') == true
|
||||
assert validate_str('\x61\xC3\xB1\xC3\xA7\xC3\xA1') == true
|
||||
assert validate_str('\xC0\xC1') == false
|
||||
assert validate_str('\xF5\xFF') == false
|
||||
assert validate_str('\xE0\xEF') == false
|
||||
}
|
Reference in New Issue
Block a user