1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/encoding/utf8/utf8_util_test.v
2022-10-01 11:01:51 +03:00

126 lines
3.1 KiB
V
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import encoding.utf8
fn test_utf8_util() {
// string test
src := 'ăĂ ôÔ testo æ' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
src_upper := 'ĂĂ ÔÔ TESTO Æ' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
src_lower := 'ăă ôô testo æ' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
upper := utf8.to_upper(src)
lower := utf8.to_lower(src)
assert upper == src_upper
assert lower == src_lower
assert utf8.to_upper('абвёabc12') == 'АБВЁABC12'
assert utf8.to_lower('АБВЁABC12') == 'абвёabc12'
// test len function
assert utf8.len('') == 0
assert utf8.len('pippo') == 5
assert utf8.len(src) == 15 // 29
assert src.len == 24 // 49
// western punctuation
a := '.abc?abcòàè.'
assert utf8.is_punct(a, 0) == true
assert utf8.is_punct('b', 0) == false
assert utf8.is_uchar_punct(0x002E) == true
assert utf8.is_punct(a, 4) == true // ?
assert utf8.is_punct(a, 14) == true // last .
assert utf8.is_punct(a, 12) == false // è
println('OK western')
// global punctuation
b := '.ĂĂa. ÔÔ TESTO Æ'
assert utf8.is_global_punct(b, 0) == true
assert utf8.is_global_punct('.', 0) == true
assert utf8.is_uchar_punct(0x002E) == true
assert utf8.is_global_punct(b, 6) == true // .
assert utf8.is_global_punct(b, 1) == false // a
// test utility functions
assert utf8.get_uchar(b, 0) == 0x002E
}
fn test_raw_indexing() {
a := 'V Lang!'
// test non ascii characters
assert utf8.raw_index(a, 0) == ''
assert utf8.raw_index(a, 1) == ''
// test ascii characters
assert utf8.raw_index(a, 2) == 'V'
assert utf8.raw_index(a, 3) == ' '
assert utf8.raw_index(a, 4) == 'L'
assert utf8.raw_index(a, 5) == 'a'
assert utf8.raw_index(a, 6) == 'n'
assert utf8.raw_index(a, 7) == 'g'
assert utf8.raw_index(a, 8) == '!'
}
fn test_reversed() {
a := 'V Lang!'
b := 'hello world'
assert utf8.reverse(a) == '!gnaL V'
assert utf8.reverse(b) == 'dlrow olleh'
}
fn test_is_control() {
for ra in `a` .. `z` {
assert utf8.is_control(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_control(ra) == false
}
assert utf8.is_control('\x01'.runes()[0]) == true
assert utf8.is_control('\u0100'.runes()[0]) == false
}
fn test_is_letter() {
for ra in `a` .. `z` {
assert utf8.is_letter(ra) == true
}
for ra in `A` .. `Z` {
assert utf8.is_letter(ra) == true
}
assert utf8.is_letter(`ɀ`) == true
assert utf8.is_letter(`ȶ`) == true
assert utf8.is_letter(`ȹ`) == true
}
fn test_is_space() {
for ra in `a` .. `z` {
assert utf8.is_space(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_space(ra) == false
}
assert utf8.is_space(`\u202f`) == true
assert utf8.is_space(`\u2009`) == true
assert utf8.is_space(`\u00A0`) == true
}
fn test_is_number() {
for ra in `a` .. `z` {
assert utf8.is_number(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_number(ra) == false
}
for ra in `0` .. `1` {
assert utf8.is_number(ra) == true
}
assert utf8.is_number(`\u2164`) == true
assert utf8.is_number(`\u2188`) == true
assert utf8.is_number(`\u3029`) == true
}