1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

encoding.utf8: add is_space (#15847)

This commit is contained in:
ChAoS_UnItY 2022-09-23 15:34:45 +08:00 committed by GitHub
parent 6ec931c781
commit ea8b30fd91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 0 deletions

View File

@ -1132,6 +1132,20 @@ const letter_table = RangeTable{
latin_offset: 6
}
const white_space_table = RangeTable{
r16: [
Range16{0x0009, 0x000d, 1},
Range16{0x0020, 0x0085, 101},
Range16{0x00a0, 0x1680, 5600},
Range16{0x2000, 0x200a, 1},
Range16{0x2028, 0x2029, 1},
Range16{0x202f, 0x205f, 48},
Range16{0x3000, 0x3000, 1},
]
r32: []
latin_offset: 2
}
struct RangeTable {
pub:
r16 []Range16

View File

@ -160,6 +160,24 @@ pub fn is_letter(r rune) bool {
return is_excluding_latin(letter_table, r)
}
// is_space returns true if the rune is character in unicode category Z with property white space or the following character set:
// ```
// `\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85 (NEL), 0xA0 (NBSP)
// ```
pub fn is_space(r rune) bool {
if r <= max_latin_1 {
match r {
`\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85, 0xA0 {
return true
}
else {
return false
}
}
}
return is_excluding_latin(white_space_table, r)
}
// is_uchar_punct return true if the input unicode is a western unicode punctuation
pub fn is_uchar_punct(uchar int) bool {
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0

View File

@ -91,3 +91,17 @@ fn test_is_letter() {
assert utf8.is_letter(`ȶ`) == true
assert utf8.is_letter(`ȹ`) == true
}
fn test_is_space() {
for ra in `a` .. `z` {
assert utf8.is_space(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_space(ra) == false
}
assert utf8.is_space(`\u202f`) == true
assert utf8.is_space(`\u2009`) == true
assert utf8.is_space(`\u00A0`) == true
}