From d7a64bbc8a2486700895f5cf1c4088af5e3758a4 Mon Sep 17 00:00:00 2001 From: ChAoS_UnItY <43753315+ChAoSUnItY@users.noreply.github.com> Date: Mon, 12 Apr 2021 10:58:03 +0800 Subject: [PATCH] encoding.utf8: fix len and ulen and optimize raw_index (#9682) --- vlib/encoding/utf8/utf8_util.v | 10 ++++++++++ vlib/encoding/utf8/utf8_util_test.v | 2 ++ 2 files changed, 12 insertions(+) diff --git a/vlib/encoding/utf8/utf8_util.v b/vlib/encoding/utf8/utf8_util.v index 0d8eff6746..5184c57e6b 100644 --- a/vlib/encoding/utf8/utf8_util.v +++ b/vlib/encoding/utf8/utf8_util.v @@ -15,6 +15,10 @@ Utility functions // len return the length as number of unicode chars from a string pub fn len(s string) int { + if s.len == 0 { + return 0 + } + mut count := 0 mut index := 0 @@ -82,10 +86,16 @@ pub fn get_uchar(s string, index int) int { // raw_index - get the raw chracter from the string by the given index value. // example: '我是V Lang'.raw_index(1) => '是' +// raw_index - get the raw chracter from the string by the given index value. +// example: utf8.raw_index('我是V Lang', 1) => '是' pub fn raw_index(s string, index int) string { mut r := []rune{} for i := 0; i < s.len; i++ { + if r.len - 1 == index { + break + } + b := s[i] ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) diff --git a/vlib/encoding/utf8/utf8_util_test.v b/vlib/encoding/utf8/utf8_util_test.v index e2cbb8ea43..4ab3f96513 100644 --- a/vlib/encoding/utf8/utf8_util_test.v +++ b/vlib/encoding/utf8/utf8_util_test.v @@ -21,10 +21,12 @@ fn test_utf8_util() { assert lower1 == (src_lower.ustring()) // test len function + assert utf8.len('') == 0 assert utf8.len('pippo') == 5 assert utf8.len(src) == 15 // 29 assert src.len == 24 // 49 // test u_len function + assert utf8.u_len(''.ustring()) == 0 assert utf8.u_len(src1) == 15 // 29 assert utf8.u_len('pippo'.ustring()) == 5