mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
encoding.utf8: add is_space (#15847)
This commit is contained in:
parent
6ec931c781
commit
ea8b30fd91
@ -1132,6 +1132,20 @@ const letter_table = RangeTable{
|
|||||||
latin_offset: 6
|
latin_offset: 6
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const white_space_table = RangeTable{
|
||||||
|
r16: [
|
||||||
|
Range16{0x0009, 0x000d, 1},
|
||||||
|
Range16{0x0020, 0x0085, 101},
|
||||||
|
Range16{0x00a0, 0x1680, 5600},
|
||||||
|
Range16{0x2000, 0x200a, 1},
|
||||||
|
Range16{0x2028, 0x2029, 1},
|
||||||
|
Range16{0x202f, 0x205f, 48},
|
||||||
|
Range16{0x3000, 0x3000, 1},
|
||||||
|
]
|
||||||
|
r32: []
|
||||||
|
latin_offset: 2
|
||||||
|
}
|
||||||
|
|
||||||
struct RangeTable {
|
struct RangeTable {
|
||||||
pub:
|
pub:
|
||||||
r16 []Range16
|
r16 []Range16
|
||||||
|
@ -160,6 +160,24 @@ pub fn is_letter(r rune) bool {
|
|||||||
return is_excluding_latin(letter_table, r)
|
return is_excluding_latin(letter_table, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// is_space returns true if the rune is character in unicode category Z with property white space or the following character set:
|
||||||
|
// ```
|
||||||
|
// `\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85 (NEL), 0xA0 (NBSP)
|
||||||
|
// ```
|
||||||
|
pub fn is_space(r rune) bool {
|
||||||
|
if r <= max_latin_1 {
|
||||||
|
match r {
|
||||||
|
`\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85, 0xA0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return is_excluding_latin(white_space_table, r)
|
||||||
|
}
|
||||||
|
|
||||||
// is_uchar_punct return true if the input unicode is a western unicode punctuation
|
// is_uchar_punct return true if the input unicode is a western unicode punctuation
|
||||||
pub fn is_uchar_punct(uchar int) bool {
|
pub fn is_uchar_punct(uchar int) bool {
|
||||||
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0
|
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0
|
||||||
|
@ -91,3 +91,17 @@ fn test_is_letter() {
|
|||||||
assert utf8.is_letter(`ȶ`) == true
|
assert utf8.is_letter(`ȶ`) == true
|
||||||
assert utf8.is_letter(`ȹ`) == true
|
assert utf8.is_letter(`ȹ`) == true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_is_space() {
|
||||||
|
for ra in `a` .. `z` {
|
||||||
|
assert utf8.is_space(ra) == false
|
||||||
|
}
|
||||||
|
|
||||||
|
for ra in `A` .. `Z` {
|
||||||
|
assert utf8.is_space(ra) == false
|
||||||
|
}
|
||||||
|
|
||||||
|
assert utf8.is_space(`\u202f`) == true
|
||||||
|
assert utf8.is_space(`\u2009`) == true
|
||||||
|
assert utf8.is_space(`\u00A0`) == true
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user