1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

encoding.utf8: add is_number (#15931)

This commit is contained in:
ChAoS_UnItY 2022-10-01 16:01:51 +08:00 committed by GitHub
parent 3d2588f101
commit fe597b7172
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 169 additions and 0 deletions

View File

@ -538,6 +538,7 @@ const (
max_latin_1 = rune(0x00ff) // '\u00FF' // `ÿ`
)
// Represents all unicode in unicode category L.
const letter_table = RangeTable{
r16: [
Range16{0x0041, 0x005a, 1},
@ -1132,6 +1133,7 @@ const letter_table = RangeTable{
latin_offset: 6
}
// Represents all unicodes in unicode category Z with property white space.
const white_space_table = RangeTable{
r16: [
Range16{0x0009, 0x000d, 1},
@ -1146,6 +1148,146 @@ const white_space_table = RangeTable{
latin_offset: 2
}
// Represents all unicodes in unicode category N.
const number_table = RangeTable{
r16: [
Range16{0x0030, 0x0039, 1},
Range16{0x00b2, 0x00b3, 1},
Range16{0x00b9, 0x00bc, 3},
Range16{0x00bd, 0x00be, 1},
Range16{0x0660, 0x0669, 1},
Range16{0x06f0, 0x06f9, 1},
Range16{0x07c0, 0x07c9, 1},
Range16{0x0966, 0x096f, 1},
Range16{0x09e6, 0x09ef, 1},
Range16{0x09f4, 0x09f9, 1},
Range16{0x0a66, 0x0a6f, 1},
Range16{0x0ae6, 0x0aef, 1},
Range16{0x0b66, 0x0b6f, 1},
Range16{0x0b72, 0x0b77, 1},
Range16{0x0be6, 0x0bf2, 1},
Range16{0x0c66, 0x0c6f, 1},
Range16{0x0c78, 0x0c7e, 1},
Range16{0x0ce6, 0x0cef, 1},
Range16{0x0d58, 0x0d5e, 1},
Range16{0x0d66, 0x0d78, 1},
Range16{0x0de6, 0x0def, 1},
Range16{0x0e50, 0x0e59, 1},
Range16{0x0ed0, 0x0ed9, 1},
Range16{0x0f20, 0x0f33, 1},
Range16{0x1040, 0x1049, 1},
Range16{0x1090, 0x1099, 1},
Range16{0x1369, 0x137c, 1},
Range16{0x16ee, 0x16f0, 1},
Range16{0x17e0, 0x17e9, 1},
Range16{0x17f0, 0x17f9, 1},
Range16{0x1810, 0x1819, 1},
Range16{0x1946, 0x194f, 1},
Range16{0x19d0, 0x19da, 1},
Range16{0x1a80, 0x1a89, 1},
Range16{0x1a90, 0x1a99, 1},
Range16{0x1b50, 0x1b59, 1},
Range16{0x1bb0, 0x1bb9, 1},
Range16{0x1c40, 0x1c49, 1},
Range16{0x1c50, 0x1c59, 1},
Range16{0x2070, 0x2074, 4},
Range16{0x2075, 0x2079, 1},
Range16{0x2080, 0x2089, 1},
Range16{0x2150, 0x2182, 1},
Range16{0x2185, 0x2189, 1},
Range16{0x2460, 0x249b, 1},
Range16{0x24ea, 0x24ff, 1},
Range16{0x2776, 0x2793, 1},
Range16{0x2cfd, 0x3007, 778},
Range16{0x3021, 0x3029, 1},
Range16{0x3038, 0x303a, 1},
Range16{0x3192, 0x3195, 1},
Range16{0x3220, 0x3229, 1},
Range16{0x3248, 0x324f, 1},
Range16{0x3251, 0x325f, 1},
Range16{0x3280, 0x3289, 1},
Range16{0x32b1, 0x32bf, 1},
Range16{0xa620, 0xa629, 1},
Range16{0xa6e6, 0xa6ef, 1},
Range16{0xa830, 0xa835, 1},
Range16{0xa8d0, 0xa8d9, 1},
Range16{0xa900, 0xa909, 1},
Range16{0xa9d0, 0xa9d9, 1},
Range16{0xa9f0, 0xa9f9, 1},
Range16{0xaa50, 0xaa59, 1},
Range16{0xabf0, 0xabf9, 1},
Range16{0xff10, 0xff19, 1},
]
r32: [
Range32{0x10107, 0x10133, 1},
Range32{0x10140, 0x10178, 1},
Range32{0x1018a, 0x1018b, 1},
Range32{0x102e1, 0x102fb, 1},
Range32{0x10320, 0x10323, 1},
Range32{0x10341, 0x1034a, 9},
Range32{0x103d1, 0x103d5, 1},
Range32{0x104a0, 0x104a9, 1},
Range32{0x10858, 0x1085f, 1},
Range32{0x10879, 0x1087f, 1},
Range32{0x108a7, 0x108af, 1},
Range32{0x108fb, 0x108ff, 1},
Range32{0x10916, 0x1091b, 1},
Range32{0x109bc, 0x109bd, 1},
Range32{0x109c0, 0x109cf, 1},
Range32{0x109d2, 0x109ff, 1},
Range32{0x10a40, 0x10a48, 1},
Range32{0x10a7d, 0x10a7e, 1},
Range32{0x10a9d, 0x10a9f, 1},
Range32{0x10aeb, 0x10aef, 1},
Range32{0x10b58, 0x10b5f, 1},
Range32{0x10b78, 0x10b7f, 1},
Range32{0x10ba9, 0x10baf, 1},
Range32{0x10cfa, 0x10cff, 1},
Range32{0x10d30, 0x10d39, 1},
Range32{0x10e60, 0x10e7e, 1},
Range32{0x10f1d, 0x10f26, 1},
Range32{0x10f51, 0x10f54, 1},
Range32{0x10fc5, 0x10fcb, 1},
Range32{0x11052, 0x1106f, 1},
Range32{0x110f0, 0x110f9, 1},
Range32{0x11136, 0x1113f, 1},
Range32{0x111d0, 0x111d9, 1},
Range32{0x111e1, 0x111f4, 1},
Range32{0x112f0, 0x112f9, 1},
Range32{0x11450, 0x11459, 1},
Range32{0x114d0, 0x114d9, 1},
Range32{0x11650, 0x11659, 1},
Range32{0x116c0, 0x116c9, 1},
Range32{0x11730, 0x1173b, 1},
Range32{0x118e0, 0x118f2, 1},
Range32{0x11950, 0x11959, 1},
Range32{0x11c50, 0x11c6c, 1},
Range32{0x11d50, 0x11d59, 1},
Range32{0x11da0, 0x11da9, 1},
Range32{0x11fc0, 0x11fd4, 1},
Range32{0x12400, 0x1246e, 1},
Range32{0x16a60, 0x16a69, 1},
Range32{0x16b50, 0x16b59, 1},
Range32{0x16b5b, 0x16b61, 1},
Range32{0x16e80, 0x16e96, 1},
Range32{0x1d2e0, 0x1d2f3, 1},
Range32{0x1d360, 0x1d378, 1},
Range32{0x1d7ce, 0x1d7ff, 1},
Range32{0x1e140, 0x1e149, 1},
Range32{0x1e2f0, 0x1e2f9, 1},
Range32{0x1e8c7, 0x1e8cf, 1},
Range32{0x1e950, 0x1e959, 1},
Range32{0x1ec71, 0x1ecab, 1},
Range32{0x1ecad, 0x1ecaf, 1},
Range32{0x1ecb1, 0x1ecb4, 1},
Range32{0x1ed01, 0x1ed2d, 1},
Range32{0x1ed2f, 0x1ed3d, 1},
Range32{0x1f100, 0x1f10c, 1},
Range32{0x1fbf0, 0x1fbf9, 1},
]
latin_offset: 4
}
struct RangeTable {
pub:
r16 []Range16
@ -1167,6 +1309,7 @@ pub:
stride u32
}
// tests if rune is in the given range table.
fn is_excluding_latin(table &RangeTable, r rune) bool {
r16 := &table.r16
off := table.latin_offset

View File

@ -178,6 +178,14 @@ pub fn is_space(r rune) bool {
return is_excluding_latin(white_space_table, r)
}
// is_number returns true if the rune is unicode number or in unicode category N
pub fn is_number(r rune) bool {
if r <= max_latin_1 {
return props[u8(r)] & p_n != 0
}
return is_excluding_latin(number_table, r)
}
// is_uchar_punct return true if the input unicode is a western unicode punctuation
pub fn is_uchar_punct(uchar int) bool {
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0

View File

@ -105,3 +105,21 @@ fn test_is_space() {
assert utf8.is_space(`\u2009`) == true
assert utf8.is_space(`\u00A0`) == true
}
fn test_is_number() {
for ra in `a` .. `z` {
assert utf8.is_number(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_number(ra) == false
}
for ra in `0` .. `1` {
assert utf8.is_number(ra) == true
}
assert utf8.is_number(`\u2164`) == true
assert utf8.is_number(`\u2188`) == true
assert utf8.is_number(`\u3029`) == true
}