From acb58d4923e38cd3726b08c30890482dbd8d3ef4 Mon Sep 17 00:00:00 2001 From: ChAoS_UnItY <43753315+ChAoSUnItY@users.noreply.github.com> Date: Sun, 11 Apr 2021 14:04:18 +0800 Subject: [PATCH] encoding.utf8: add support for indexing a utf8 str (#9670) --- vlib/encoding/utf8/utf8_util.v | 21 +++++++++++++++++++++ vlib/encoding/utf8/utf8_util_test.v | 17 +++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/vlib/encoding/utf8/utf8_util.v b/vlib/encoding/utf8/utf8_util.v index fe9e99a7a2..ca1d517a74 100644 --- a/vlib/encoding/utf8/utf8_util.v +++ b/vlib/encoding/utf8/utf8_util.v @@ -87,6 +87,27 @@ pub fn get_uchar(s string, index int) int { } +// raw_index - get the raw chracter from the string by the given index value. +// example: '我是V Lang'.raw_index(1) => '是' + +pub fn raw_index(s string, index int) string { + mut r := []rune{} + + for i := 0; i < s.len; i++ { + b := s[i] + ch_len := ((0xe5000000>>((b>>3) & 0x1e)) & 3) + + r << if ch_len > 0 { + i += ch_len + rune(get_uchar(s,i-ch_len)) + } else { + rune(b) + } + } + + return r[index].str() +} + /* Conversion functions diff --git a/vlib/encoding/utf8/utf8_util_test.v b/vlib/encoding/utf8/utf8_util_test.v index 646bd69a7f..e26846c261 100644 --- a/vlib/encoding/utf8/utf8_util_test.v +++ b/vlib/encoding/utf8/utf8_util_test.v @@ -50,3 +50,20 @@ fn test_utf8_util() { // test utility functions assert utf8.get_uchar(b,0)==0x002E } + +fn test_raw_indexing() { + a := "我是V Lang!" + + // test non ascii characters + assert utf8.raw_index(a, 0) == '我' + assert utf8.raw_index(a, 1) == '是' + + // test ascii characters + assert utf8.raw_index(a, 2) == 'V' + assert utf8.raw_index(a, 3) == ' ' + assert utf8.raw_index(a, 4) == 'L' + assert utf8.raw_index(a, 5) == 'a' + assert utf8.raw_index(a, 6) == 'n' + assert utf8.raw_index(a, 7) == 'g' + assert utf8.raw_index(a, 8) == '!' +}