1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

move vlib/builtin/utf8_util.v to vlib/encoding/utf8/utf8_util.v

This commit is contained in:
Delyan Angelov 2019-11-30 07:52:37 +02:00 committed by Alexander Medvednikov
parent 661ddf3d1e
commit c7f8f2175b
2 changed files with 16 additions and 14 deletions

View File

@ -5,7 +5,7 @@
* This file contains utilities for utf8 strings
*
**********************************************************************/
module builtin
module utf8
//
// utf8_to_upper
@ -16,8 +16,8 @@ module builtin
//
// Convert a utf8 string to uppercase
//
pub fn (s string) utf8_to_upper() string {
return s.utf8_up_low(true)
pub fn to_upper(s string) string {
return up_low(s, true)
}
//
@ -29,8 +29,8 @@ pub fn (s string) utf8_to_upper() string {
//
// Convert a utf8 string to lowercase
//
pub fn (s string) utf8_to_lower() string {
return s.utf8_up_low(false)
pub fn to_lower(s string) string {
return up_low(s, false)
}
// Private function, calculate the lenght in bytes of a utf8 rune
@ -39,7 +39,7 @@ fn utf8util_char_len(b byte) int {
}
// Private function, make the dir jobs
fn (s string) utf8_up_low(uppper_flag bool) string {
fn up_low(s string, uppper_flag bool) string {
mut _index := 0
mut old_index := 0
mut str_res := malloc(s.len + 1)
@ -110,8 +110,8 @@ fn (s string) utf8_up_low(uppper_flag bool) string {
//C.printf("Old char: %04x, New char: %04x, index: %d, offset: %d\n",unicode_con_table[ch_index],tab_char,ch_index,offset)
if ch_len == 2 {
ch0:=( (tab_char >> 6) & 0x1f ) | 0xc0 /*110x xxxx*/
ch1:=( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
ch0:=byte( (tab_char >> 6) & 0x1f ) | 0xc0 /*110x xxxx*/
ch1:=byte( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
//C.printf("[%02x%02x]",ch0,ch1)
str_res[ _index + 0 ] = ch0
@ -124,9 +124,9 @@ fn (s string) utf8_up_low(uppper_flag bool) string {
}
else if ch_len == 3 {
ch0:=( (tab_char >> 12) & 0x0f ) | 0xe0 /*1110 xxxx*/
ch1:=( (tab_char >> 6) & 0x3f ) | 0x80 /*10xx xxxx*/
ch2:=( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
ch0:=byte( (tab_char >> 12) & 0x0f ) | 0xe0 /*1110 xxxx*/
ch1:=byte( (tab_char >> 6) & 0x3f ) | 0x80 /*10xx xxxx*/
ch2:=byte( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
str_res[_index + 0 ] = ch0
str_res[_index + 1 ] = ch1
@ -852,4 +852,4 @@ u16(0x0061), 0x0041, // LATIN SMALL LETTER A LATIN CAPITAL LETTER A,
0xFF59, 0xFF39, // FULLWIDTH LATIN SMALL LETTER Y FULLWIDTH LATIN CAPITAL LETTER Y
0xFF5A, 0xFF3A, // FULLWIDTH LATIN SMALL LETTER Z FULLWIDTH LATIN CAPITAL LETTER Z
]
)
)

View File

@ -1,7 +1,9 @@
import encoding.utf8
fn test_utf8_util() {
src:="ăĂ ôÔ testo æ"
upper:=src.utf8_to_upper()
lower:=src.utf8_to_lower()
upper:=utf8.to_upper(src)
lower:=utf8.to_lower(src)
assert upper=="ĂĂ ÔÔ TESTO Æ"
assert lower=="ăă ôô testo æ"
}