2022-01-04 12:21:08 +03:00
// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
2019-06-23 05:21:30 +03:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2019-06-22 21:20:28 +03:00
module builtin
2019-11-01 00:28:52 +03:00
import strconv
2020-12-03 18:02:48 +03:00
2019-09-28 13:54:30 +03:00
/ *
2022-03-06 20:01:22 +03:00
Note : A V string should be / is immutable from the point of view of
2019-10-04 15:48:09 +03:00
V user programs after it is first created . A V string is
also slightly larger than the equivalent C string because
2019-09-28 13:54:30 +03:00
the V string also has an integer length attached .
2019-10-04 15:48:09 +03:00
2019-09-28 13:54:30 +03:00
This tradeoff is made , since V strings are created just * once * ,
but potentially used * many times * over their lifetime .
2019-10-04 15:48:09 +03:00
2019-09-28 13:54:30 +03:00
The V string implementation uses a struct , that has a . str field ,
which points to a C style 0 terminated memory block . Although not
2019-10-04 15:48:09 +03:00
strictly necessary from the V point of view , that additional 0
2019-09-28 13:54:30 +03:00
is * very useful for C interoperability * .
2019-10-04 15:48:09 +03:00
The V string implementation also has an integer . len field ,
containing the length of the . str field , excluding the
2019-09-28 13:54:30 +03:00
terminating 0 ( just like the C ' s s t r l e n ( s ) w o u l d d o ) .
2019-10-04 15:48:09 +03:00
2019-09-28 13:54:30 +03:00
The 0 ending of . str , and the . len field , mean that in practice :
a ) a V string s can be used very easily , wherever a
C string is needed , just by passing s . str ,
without a need for further conversion / copying .
2019-10-04 15:48:09 +03:00
b ) where strlen ( s ) is needed , you can just pass s . len ,
without having to constantly recompute the length of s
2019-09-28 13:54:30 +03:00
* over and over again * like some C programs do . This is because
V strings are immutable and so their length does not change .
2019-10-04 15:48:09 +03:00
Ordinary V code * does not need * to be concerned with the
additional 0 in the . str field . The 0 * must * be put there by the
2019-09-28 13:54:30 +03:00
low level string creating functions inside this module .
2019-10-04 15:48:09 +03:00
Failing to do this will lead to programs that work most of the
time , when used with pure V functions , but fail in strange ways ,
2019-09-28 13:54:30 +03:00
when used with modules using C functions ( for example os and so on ) .
* /
2019-10-24 12:47:21 +03:00
pub struct string {
2019-06-22 21:20:28 +03:00
pub :
2022-04-15 14:45:52 +03:00
str & u8 = 0 // points to a C style 0 terminated string of bytes.
2022-04-15 18:25:45 +03:00
len int // the length of the .str field, excluding the ending 0 byte. It is always equal to strlen(.str).
2021-07-03 20:16:49 +03:00
// NB string.is_lit is an enumeration of the following:
// .is_lit == 0 => a fresh string, should be freed by autofree
// .is_lit == 1 => a literal string from .rodata, should NOT be freed
// .is_lit == -98761234 => already freed string, protects against double frees.
// ---------> ^^^^^^^^^ calling free on these is a bug.
// Any other value means that the string has been corrupted.
2020-07-11 00:59:19 +03:00
mut :
2020-06-30 18:28:28 +03:00
is_lit int
}
2020-12-03 18:02:48 +03:00
2021-10-29 15:49:30 +03:00
// runes returns an array of all the utf runes in the string `s`
// which is useful if you want random access to them
[ direct_array_access ]
2021-06-30 09:17:38 +03:00
pub fn ( s string ) runes ( ) [ ] rune {
mut runes := [ ] rune { cap : s . len }
for i := 0 ; i < s . len ; i ++ {
char_len := utf8_char_len ( unsafe { s . str [ i ] } )
if char_len > 1 {
2021-06-30 22:30:28 +03:00
end := if s . len - 1 >= i + char_len { i + char_len } else { s . len }
mut r := unsafe { s [ i .. end ] }
2021-06-30 09:17:38 +03:00
runes << r . utf32_code ( )
i += char_len - 1
} else {
runes << unsafe { s . str [ i ] }
}
}
return runes
}
2021-12-02 16:46:53 +03:00
// cstring_to_vstring creates a new V string copy of the C style string,
// pointed by `s`. This function is most likely what you want to use when
// working with C style pointers to 0 terminated strings (i.e. `char*`).
// It is recomended to use it, unless you *do* understand the implications of
// tos/tos2/tos3/tos4/tos5 in terms of memory management and interactions with
// -autofree and `[manualfree]`.
// It will panic, if the pointer `s` is 0.
[ unsafe ]
pub fn cstring_to_vstring ( s & char ) string {
2022-04-15 14:45:52 +03:00
return unsafe { tos2 ( & u8 ( s ) ) } . clone ( )
2021-12-02 16:46:53 +03:00
}
// tos_clone creates a new V string copy of the C style string, pointed by `s`.
// See also cstring_to_vstring (it is the same as it, the only difference is,
// that tos_clone expects `&byte`, while cstring_to_vstring expects &char).
// It will panic, if the pointer `s` is 0.
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn tos_clone ( s & u8 ) string {
2021-12-02 16:46:53 +03:00
return unsafe { tos2 ( s ) } . clone ( )
}
// tos creates a V string, given a C style pointer to a 0 terminated block.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by s is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// It will panic, when the pointer `s` is 0.
// See also `tos_clone`.
2021-02-15 18:15:52 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn tos ( s & u8 , len int ) string {
2019-10-04 15:48:09 +03:00
if s == 0 {
2019-06-22 21:20:28 +03:00
panic ( ' t o s ( ) : n i l s t r i n g ' )
}
2019-12-19 23:52:45 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { s }
2019-06-22 21:20:28 +03:00
len : len
}
}
2021-12-02 16:46:53 +03:00
// tos2 creates a V string, given a C style pointer to a 0 terminated block.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by s is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// It will calculate the length first, thus it is more costly than `tos`.
// It will panic, when the pointer `s` is 0.
// It is the same as `tos3`, but for &byte pointers, avoiding callsite casts.
// See also `tos_clone`.
2021-02-15 18:15:52 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn tos2 ( s & u8 ) string {
2019-10-04 15:48:09 +03:00
if s == 0 {
2019-06-22 21:20:28 +03:00
panic ( ' t o s 2 : n i l s t r i n g ' )
}
2019-12-19 23:52:45 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { s }
2021-02-14 21:31:42 +03:00
len : unsafe { vstrlen ( s ) }
2019-10-04 15:48:09 +03:00
}
}
2021-12-02 16:46:53 +03:00
// tos3 creates a V string, given a C style pointer to a 0 terminated block.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by s is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// It will calculate the length first, so it is more costly than tos.
// It will panic, when the pointer `s` is 0.
// It is the same as `tos2`, but for &char pointers, avoiding callsite casts.
// See also `tos_clone`.
2021-02-15 18:15:52 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn tos3 ( s & char ) string {
2019-10-04 15:48:09 +03:00
if s == 0 {
panic ( ' t o s 3 : n i l s t r i n g ' )
}
2019-12-19 23:52:45 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : unsafe { & u8 ( s ) }
2021-08-12 21:46:38 +03:00
len : unsafe { vstrlen_char ( s ) }
2019-10-04 15:48:09 +03:00
}
2019-06-22 21:20:28 +03:00
}
2021-12-02 16:46:53 +03:00
// tos4 creates a V string, given a C style pointer to a 0 terminated block.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by s is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// It will calculate the length first, so it is more costly than tos.
// It returns '', when given a 0 pointer `s`, it does NOT panic.
// It is the same as `tos5`, but for &byte pointers, avoiding callsite casts.
// See also `tos_clone`.
2021-02-15 18:15:52 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn tos4 ( s & u8 ) string {
2020-12-09 22:10:41 +03:00
if s == 0 {
2020-12-12 13:10:29 +03:00
return ' '
2020-12-09 22:10:41 +03:00
}
2021-12-02 16:46:53 +03:00
return string {
str : unsafe { s }
len : unsafe { vstrlen ( s ) }
}
2020-12-09 22:10:41 +03:00
}
2021-12-02 16:46:53 +03:00
// tos5 creates a V string, given a C style pointer to a 0 terminated block.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by s is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// It will calculate the length first, so it is more costly than tos.
// It returns '', when given a 0 pointer `s`, it does NOT panic.
// It is the same as `tos4`, but for &char pointers, avoiding callsite casts.
// See also `tos_clone`.
2021-02-15 18:15:52 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn tos5 ( s & char ) string {
2020-12-09 22:10:41 +03:00
if s == 0 {
2020-12-12 13:10:29 +03:00
return ' '
2020-12-09 22:10:41 +03:00
}
2021-12-02 16:46:53 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : unsafe { & u8 ( s ) }
2021-12-02 16:46:53 +03:00
len : unsafe { vstrlen_char ( s ) }
}
2020-12-09 22:10:41 +03:00
}
2021-12-02 16:46:53 +03:00
// vstring converts a C style string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2022-04-15 14:58:56 +03:00
// Note: instead of `&u8(arr.data).vstring()`, do use `tos_clone(&u8(arr.data))`.
2021-12-02 16:46:53 +03:00
// Strings returned from this function will be normal V strings beside that,
// (i.e. they would be freed by V's -autofree mechanism, when they are no longer used).
// See also `tos_clone`.
2020-08-10 19:05:26 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn ( bp & u8 ) vstring ( ) string {
2020-08-10 19:05:26 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { bp }
2021-08-12 21:46:38 +03:00
len : unsafe { vstrlen ( bp ) }
2020-08-12 06:54:51 +03:00
}
2020-08-10 19:05:26 +03:00
}
2021-12-02 16:46:53 +03:00
// vstring_with_len converts a C style 0 terminated string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// This method has lower overhead compared to .vstring(), since it
// does not need to calculate the length of the 0 terminated string.
// See also `tos_clone`.
2020-08-10 19:05:26 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn ( bp & u8 ) vstring_with_len ( len int ) string {
2020-08-10 19:05:26 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { bp }
2020-08-10 19:05:26 +03:00
len : len
2021-03-23 23:10:11 +03:00
is_lit : 0
2020-08-12 06:54:51 +03:00
}
2020-08-10 19:05:26 +03:00
}
2020-05-07 19:05:54 +03:00
2021-12-02 16:46:53 +03:00
// vstring converts a C style string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// Strings returned from this function will be normal V strings beside that,
// (i.e. they would be freed by V's -autofree mechanism, when they are
// no longer used).
2022-04-15 14:58:56 +03:00
// Note: instead of `&u8(a.data).vstring()`, use `tos_clone(&u8(a.data))`.
2021-12-02 16:46:53 +03:00
// See also `tos_clone`.
2020-11-14 20:43:42 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn ( cp & char ) vstring ( ) string {
2020-11-14 20:43:42 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : & u8 ( cp )
2021-08-12 21:46:38 +03:00
len : unsafe { vstrlen_char ( cp ) }
2021-03-23 23:10:11 +03:00
is_lit : 0
2020-11-14 20:43:42 +03:00
}
}
2021-12-02 16:46:53 +03:00
// vstring_with_len converts a C style 0 terminated string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// This method has lower overhead compared to .vstring(), since it
// does not calculate the length of the 0 terminated string.
// See also `tos_clone`.
2020-11-14 20:43:42 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn ( cp & char ) vstring_with_len ( len int ) string {
2020-11-14 20:43:42 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : & u8 ( cp )
2020-11-14 20:43:42 +03:00
len : len
2021-03-23 23:10:11 +03:00
is_lit : 0
}
}
// vstring_literal converts a C style string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-03-23 23:10:11 +03:00
// NB2: unlike vstring, vstring_literal will mark the string
2021-12-02 16:46:53 +03:00
// as a literal, so it will not be freed by -autofree.
2021-03-26 11:42:40 +03:00
// This is suitable for readonly strings, C string literals etc,
2021-03-23 23:10:11 +03:00
// that can be read by the V program, but that should not be
2021-12-02 16:46:53 +03:00
// managed/freed by it, for example `os.args` is implemented using it.
// See also `tos_clone`.
2021-03-23 23:10:11 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn ( bp & u8 ) vstring_literal ( ) string {
2021-03-23 23:10:11 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { bp }
2021-08-12 21:46:38 +03:00
len : unsafe { vstrlen ( bp ) }
2021-03-23 23:10:11 +03:00
is_lit : 1
}
}
// vstring_with_len converts a C style string to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// This method has lower overhead compared to .vstring_literal(), since it
// does not need to calculate the length of the 0 terminated string.
// See also `tos_clone`.
2021-03-23 23:10:11 +03:00
[ unsafe ]
2022-04-15 14:45:52 +03:00
pub fn ( bp & u8 ) vstring_literal_with_len ( len int ) string {
2021-03-23 23:10:11 +03:00
return string {
2021-05-07 15:58:48 +03:00
str : unsafe { bp }
2021-03-23 23:10:11 +03:00
len : len
is_lit : 1
}
}
2021-12-02 16:46:53 +03:00
// vstring_literal converts a C style string char* pointer to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// See also `byteptr.vstring_literal` for more details.
// See also `tos_clone`.
2021-03-23 23:10:11 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn ( cp & char ) vstring_literal ( ) string {
2021-03-23 23:10:11 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : & u8 ( cp )
2021-08-12 21:46:38 +03:00
len : unsafe { vstrlen_char ( cp ) }
2021-03-23 23:10:11 +03:00
is_lit : 1
}
}
2021-12-02 16:46:53 +03:00
// vstring_literal_with_len converts a C style string char* pointer,
// to a V string.
2022-03-06 20:01:22 +03:00
// Note: the memory block pointed by `bp` is *reused, not copied*!
2021-12-02 16:46:53 +03:00
// This method has lower overhead compared to .vstring_literal(), since it
// does not need to calculate the length of the 0 terminated string.
// See also `tos_clone`.
2021-03-23 23:10:11 +03:00
[ unsafe ]
2021-04-04 17:43:32 +03:00
pub fn ( cp & char ) vstring_literal_with_len ( len int ) string {
2021-03-23 23:10:11 +03:00
return string {
2022-04-15 14:45:52 +03:00
str : & u8 ( cp )
2021-03-23 23:10:11 +03:00
len : len
is_lit : 1
2020-11-14 20:43:42 +03:00
}
}
2022-04-29 10:23:57 +03:00
// len_utf8 returns the number of runes contained in the string `s`.
pub fn ( s string ) len_utf8 ( ) int {
mut l := 0
mut i := 0
for i < s . len {
l ++
i += ( ( 0xe5000000 >> ( ( unsafe { s . str [ i ] } >> 3 ) & 0x1e ) ) & 3 ) + 1
}
return l
}
2020-12-12 13:10:29 +03:00
// clone_static returns an independent copy of a given array.
2020-05-06 19:03:44 +03:00
// It should be used only in -autofree generated code.
fn ( a string ) clone_static ( ) string {
return a . clone ( )
}
2020-12-12 13:10:29 +03:00
// clone returns a copy of the V string `a`.
2019-06-27 14:14:59 +03:00
pub fn ( a string ) clone ( ) string {
2021-05-11 13:59:44 +03:00
if a . len == 0 {
2020-12-11 07:03:25 +03:00
return ' '
}
2019-12-19 23:52:45 +03:00
mut b := string {
2021-06-12 11:27:08 +03:00
str : unsafe { malloc_noscan ( a . len + 1 ) }
2020-05-18 22:38:06 +03:00
len : a . len
2019-06-22 21:20:28 +03:00
}
2020-07-15 22:56:50 +03:00
unsafe {
2021-08-12 21:46:38 +03:00
vmemcpy ( b . str , a . str , a . len )
2021-04-13 11:29:33 +03:00
b . str [ a . len ] = 0
2020-07-15 22:56:50 +03:00
}
2019-06-22 21:20:28 +03:00
return b
}
2020-12-12 13:10:29 +03:00
// replace_once replaces the first occurence of `rep` with the string passed in `with`.
2020-10-15 13:32:28 +03:00
pub fn ( s string ) replace_once ( rep string , with string ) string {
2021-01-15 04:26:06 +03:00
idx := s . index_ ( rep )
if idx == - 1 {
return s . clone ( )
}
return s . substr ( 0 , idx ) + with + s . substr ( idx + rep . len , s . len )
2019-11-11 18:43:22 +03:00
}
2019-06-22 21:20:28 +03:00
2020-12-12 13:10:29 +03:00
// replace replaces all occurences of `rep` with the string passed in `with`.
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2020-10-15 13:32:28 +03:00
pub fn ( s string ) replace ( rep string , with string ) string {
2021-05-01 21:27:49 +03:00
if s . len == 0 || rep . len == 0 || rep . len > s . len {
2020-05-25 09:17:36 +03:00
return s . clone ( )
2019-06-22 21:20:28 +03:00
}
2021-05-22 14:35:33 +03:00
if ! s . contains ( rep ) {
return s . clone ( )
}
2019-06-22 21:20:28 +03:00
// TODO PERF Allocating ints is expensive. Should be a stack array
// Get locations of all reps within this string
2021-05-01 21:27:49 +03:00
mut idxs := [ ] int { cap : s . len / rep . len }
2020-12-03 18:02:48 +03:00
defer {
2021-01-05 20:59:51 +03:00
unsafe { idxs . free ( ) }
2020-10-21 20:44:31 +03:00
}
2019-12-10 18:50:21 +03:00
mut idx := 0
2019-06-29 18:29:29 +03:00
for {
2019-12-10 18:50:21 +03:00
idx = s . index_after ( rep , idx )
if idx == - 1 {
break
}
idxs << idx
2019-12-27 07:20:06 +03:00
idx += rep . len
2019-06-22 21:20:28 +03:00
}
// Dont change the string if there's nothing to replace
if idxs . len == 0 {
2020-05-25 09:17:36 +03:00
return s . clone ( )
2019-06-22 21:20:28 +03:00
}
// Now we know the number of replacements we need to do and we can calc the len of the new string
new_len := s . len + idxs . len * ( with . len - rep . len )
2021-06-12 11:27:08 +03:00
mut b := unsafe { malloc_noscan ( new_len + 1 ) } // add space for the null byte at the end
2019-06-22 21:20:28 +03:00
// Fill the new string
2019-06-28 15:19:46 +03:00
mut b_i := 0
2021-05-02 19:30:07 +03:00
mut s_idx := 0
for _ , rep_pos in idxs {
for i in s_idx .. rep_pos { // copy everything up to piece being replaced
unsafe {
b [ b_i ] = s [ i ]
2019-06-22 21:20:28 +03:00
}
2021-05-02 19:30:07 +03:00
b_i ++
}
s_idx = rep_pos + rep . len // move string index past replacement
for i in 0 .. with . len { // copy replacement piece
unsafe {
b [ b_i ] = with [ i ]
2019-12-10 14:32:12 +03:00
}
2021-05-02 19:30:07 +03:00
b_i ++
}
}
if s_idx < s . len { // if any original after last replacement, copy it
for i in s_idx .. s . len {
2020-07-15 22:56:50 +03:00
unsafe {
b [ b_i ] = s [ i ]
}
2019-12-10 14:32:12 +03:00
b_i ++
}
}
2020-07-15 22:56:50 +03:00
unsafe {
2021-04-13 11:29:33 +03:00
b [ new_len ] = 0
2020-07-15 22:56:50 +03:00
return tos ( b , new_len )
}
2019-12-10 14:32:12 +03:00
}
struct RepIndex {
2019-12-19 23:52:45 +03:00
idx int
2019-12-10 14:32:12 +03:00
val_idx int
}
2020-12-12 13:10:29 +03:00
// replace_each replaces all occurences of the string pairs given in `vals`.
// Example: assert 'ABCD'.replace_each(['B','C/','C','D','D','C']) == 'AC/DC'
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2019-12-10 14:32:12 +03:00
pub fn ( s string ) replace_each ( vals [ ] string ) string {
if s . len == 0 || vals . len == 0 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-12-10 14:32:12 +03:00
}
if vals . len % 2 != 0 {
2021-04-17 12:30:45 +03:00
eprintln ( ' s t r i n g . r e p l a c e _ e a c h ( ) : o d d n u m b e r o f s t r i n g s ' )
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-12-10 14:32:12 +03:00
}
// `rep` - string to replace
// `with` - string to replace with
// Remember positions of all rep strings, and calculate the length
// of the new string to do just one allocation.
mut new_len := s . len
2021-10-29 15:49:30 +03:00
mut idxs := [ ] RepIndex { cap : 6 }
2019-12-10 14:32:12 +03:00
mut idx := 0
2021-03-22 11:46:45 +03:00
s_ := s . clone ( )
2019-12-19 23:52:45 +03:00
for rep_i := 0 ; rep_i < vals . len ; rep_i += 2 {
2019-12-10 14:32:12 +03:00
// vals: ['rep1, 'with1', 'rep2', 'with2']
rep := vals [ rep_i ]
2019-12-19 23:52:45 +03:00
with := vals [ rep_i + 1 ]
2021-08-26 15:20:54 +03:00
2019-12-10 14:32:12 +03:00
for {
2021-03-22 11:46:45 +03:00
idx = s_ . index_after ( rep , idx )
2019-12-10 14:32:12 +03:00
if idx == - 1 {
break
}
2021-03-22 11:46:45 +03:00
// The string already found is set to `/del`, to avoid duplicate searches.
for i in 0 .. rep . len {
unsafe {
s_ . str [ idx + i ] = 127
}
}
2019-12-10 14:32:12 +03:00
// We need to remember both the position in the string,
// and which rep/with pair it refers to.
2021-08-26 15:20:54 +03:00
2020-12-03 18:02:48 +03:00
idxs << RepIndex {
idx : idx
val_idx : rep_i
2020-03-16 17:46:09 +03:00
}
2021-08-25 14:40:53 +03:00
2020-09-12 03:31:06 +03:00
idx += rep . len
2019-12-10 14:32:12 +03:00
new_len += with . len - rep . len
}
}
2021-08-26 15:20:54 +03:00
2019-12-10 14:32:12 +03:00
// Dont change the string if there's nothing to replace
if idxs . len == 0 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-12-10 14:32:12 +03:00
}
2021-08-09 15:42:31 +03:00
idxs . sort ( a . idx < b . idx )
2021-06-12 11:27:08 +03:00
mut b := unsafe { malloc_noscan ( new_len + 1 ) } // add space for 0 terminator
2019-12-10 14:32:12 +03:00
// Fill the new string
mut idx_pos := 0
mut cur_idx := idxs [ idx_pos ]
mut b_i := 0
for i := 0 ; i < s . len ; i ++ {
if i == cur_idx . idx {
2020-04-07 19:51:39 +03:00
// Reached the location of rep, replace it with "with"
2019-12-10 14:32:12 +03:00
rep := vals [ cur_idx . val_idx ]
2019-12-19 23:52:45 +03:00
with := vals [ cur_idx . val_idx + 1 ]
2020-12-03 18:02:48 +03:00
for j in 0 .. with . len {
2020-07-15 22:56:50 +03:00
unsafe {
b [ b_i ] = with [ j ]
}
2019-12-10 14:32:12 +03:00
b_i ++
}
// Skip the length of rep, since we just replaced it with "with"
i += rep . len - 1
// Go to the next index
idx_pos ++
if idx_pos < idxs . len {
2019-06-22 21:20:28 +03:00
cur_idx = idxs [ idx_pos ]
}
2020-12-03 18:02:48 +03:00
} else {
2020-04-07 19:51:39 +03:00
// Rep doesnt start here, just copy
2020-07-15 22:56:50 +03:00
unsafe {
b [ b_i ] = s . str [ i ]
}
2019-06-22 21:20:28 +03:00
b_i ++
}
}
2020-07-15 22:56:50 +03:00
unsafe {
2021-04-13 11:29:33 +03:00
b [ new_len ] = 0
2020-07-15 22:56:50 +03:00
return tos ( b , new_len )
}
2019-06-22 21:20:28 +03:00
}
2022-07-27 22:04:39 +03:00
// replace_char replaces all occurences of the character `rep` multiple occurences of the character passed in `with` with respect to `repeat`.
// Example: assert '\tHello!'.replace_char(`\t`,` `,8) == ' Hello!'
[ direct_array_access ]
pub fn ( s string ) replace_char ( rep u8 , with u8 , repeat int ) string {
$ if ! no_bounds_checking ? {
if repeat <= 0 {
panic ( ' s t r i n g . r e p l a c e _ c h a r ( ) : t a b l e n g t h t o o s h o r t ' )
}
}
if s . len == 0 {
return s . clone ( )
}
// TODO Allocating ints is expensive. Should be a stack array
// - string.replace()
mut idxs := [ ] int { cap : s . len }
defer {
unsafe { idxs . free ( ) }
}
// No need to do a contains(), it already traverses the entire string
for i , ch in s {
if ch == rep { // Found char? Mark its location
idxs << i
}
}
if idxs . len == 0 {
return s . clone ( )
}
// Now we know the number of replacements we need to do and we can calc the len of the new string
new_len := s . len + idxs . len * ( repeat - 1 )
mut b := unsafe { malloc_noscan ( new_len + 1 ) } // add space for the null byte at the end
// Fill the new string
mut b_i := 0
mut s_idx := 0
for rep_pos in idxs {
for i in s_idx .. rep_pos { // copy everything up to piece being replaced
unsafe {
b [ b_i ] = s [ i ]
}
b_i ++
}
s_idx = rep_pos + 1 // move string index past replacement
for _ in 0 .. repeat { // copy replacement piece
unsafe {
b [ b_i ] = with
}
b_i ++
}
}
if s_idx < s . len { // if any original after last replacement, copy it
for i in s_idx .. s . len {
unsafe {
b [ b_i ] = s [ i ]
}
b_i ++
}
}
unsafe {
b [ new_len ] = 0
return tos ( b , new_len )
}
}
// normalize_tabs replaces all tab characters with `tab_len` amount of spaces
// Example: assert '\t\tpop rax\t; pop rax'.normalize_tabs(2) == ' pop rax ; pop rax'
[ inline ]
pub fn ( s string ) normalize_tabs ( tab_len int ) string {
return s . replace_char ( ` \t ` , ` ` , tab_len )
}
2020-12-12 13:10:29 +03:00
// bool returns `true` if the string equals the word "true" it will return `false` otherwise.
2019-12-09 17:10:44 +03:00
pub fn ( s string ) bool ( ) bool {
return s == ' t r u e ' || s == ' t ' // TODO t for pg, remove
}
2019-10-25 23:41:18 +03:00
2020-12-12 13:10:29 +03:00
// int returns the value of the string as an integer `'1'.int() == 1`.
2019-10-25 23:41:18 +03:00
pub fn ( s string ) int ( ) int {
2021-07-02 10:39:57 +03:00
return int ( strconv . common_parse_int ( s , 0 , 32 , false , false ) or { 0 } )
2019-10-25 23:41:18 +03:00
}
2019-06-25 11:04:02 +03:00
2020-12-12 13:10:29 +03:00
// i64 returns the value of the string as i64 `'1'.i64() == i64(1)`.
2019-07-10 17:05:39 +03:00
pub fn ( s string ) i64 ( ) i64 {
2021-07-02 10:39:57 +03:00
return strconv . common_parse_int ( s , 0 , 64 , false , false ) or { 0 }
2019-07-10 17:05:39 +03:00
}
2020-12-12 13:10:29 +03:00
// i8 returns the value of the string as i8 `'1'.i8() == i8(1)`.
2020-01-14 20:05:38 +03:00
pub fn ( s string ) i8 ( ) i8 {
2021-07-02 10:39:57 +03:00
return i8 ( strconv . common_parse_int ( s , 0 , 8 , false , false ) or { 0 } )
2020-01-14 20:05:38 +03:00
}
2020-12-12 13:10:29 +03:00
// i16 returns the value of the string as i16 `'1'.i16() == i16(1)`.
2020-01-14 20:05:38 +03:00
pub fn ( s string ) i16 ( ) i16 {
2021-07-02 10:39:57 +03:00
return i16 ( strconv . common_parse_int ( s , 0 , 16 , false , false ) or { 0 } )
2020-01-14 20:05:38 +03:00
}
2020-12-12 13:10:29 +03:00
// f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`.
2019-06-27 14:14:59 +03:00
pub fn ( s string ) f32 ( ) f32 {
2022-02-10 14:27:32 +03:00
return f32 ( strconv . atof64 ( s ) or { 0 } )
2019-06-25 11:04:02 +03:00
}
2020-12-12 13:10:29 +03:00
// f64 returns the value of the string as f64 `'1.0'.f64() == f64(1)`.
2019-07-17 21:11:14 +03:00
pub fn ( s string ) f64 ( ) f64 {
2022-02-10 14:27:32 +03:00
return strconv . atof64 ( s ) or { 0 }
2019-07-17 21:11:14 +03:00
}
2021-09-28 10:08:10 +03:00
// u8 returns the value of the string as u8 `'1'.u8() == u8(1)`.
2022-04-15 14:58:56 +03:00
pub fn ( s string ) u8 ( ) u8 {
2022-04-15 14:45:52 +03:00
return u8 ( strconv . common_parse_uint ( s , 0 , 8 , false , false ) or { 0 } )
2021-09-28 10:08:10 +03:00
}
2020-12-12 13:10:29 +03:00
// u16 returns the value of the string as u16 `'1'.u16() == u16(1)`.
2020-01-14 20:05:38 +03:00
pub fn ( s string ) u16 ( ) u16 {
2021-07-02 10:39:57 +03:00
return u16 ( strconv . common_parse_uint ( s , 0 , 16 , false , false ) or { 0 } )
2020-01-14 20:05:38 +03:00
}
2020-12-12 13:10:29 +03:00
// u32 returns the value of the string as u32 `'1'.u32() == u32(1)`.
2019-07-17 21:11:14 +03:00
pub fn ( s string ) u32 ( ) u32 {
2021-07-02 10:39:57 +03:00
return u32 ( strconv . common_parse_uint ( s , 0 , 32 , false , false ) or { 0 } )
2019-07-17 21:11:14 +03:00
}
2020-12-12 13:10:29 +03:00
// u64 returns the value of the string as u64 `'1'.u64() == u64(1)`.
2019-07-17 21:11:14 +03:00
pub fn ( s string ) u64 ( ) u64 {
2021-07-02 10:39:57 +03:00
return strconv . common_parse_uint ( s , 0 , 64 , false , false ) or { 0 }
2019-07-17 21:11:14 +03:00
}
2022-05-07 20:18:42 +03:00
// parse_uint is like `parse_int` but for unsigned numbers
//
// This method directly exposes the `parse_int` function from `strconv`
// as a method on `string`. For more advanced features,
// consider calling `strconv.common_parse_int` directly.
2022-10-16 09:28:57 +03:00
pub fn ( s string ) parse_uint ( _base int , _bit_size int ) ! u64 {
2022-05-07 20:18:42 +03:00
return strconv . parse_uint ( s , _base , _bit_size )
}
2022-01-14 18:27:38 +03:00
// parse_int interprets a string s in the given base (0, 2 to 36) and
2022-01-14 01:31:11 +03:00
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
//
// This method directly exposes the `parse_uint` function from `strconv`
// as a method on `string`. For more advanced features,
// consider calling `strconv.common_parse_uint` directly.
2022-10-16 09:28:57 +03:00
pub fn ( s string ) parse_int ( _base int , _bit_size int ) ! i64 {
2022-01-14 01:31:11 +03:00
return strconv . parse_int ( s , _base , _bit_size )
}
2021-05-23 17:22:57 +03:00
[ direct_array_access ]
2021-05-24 14:05:29 +03:00
fn ( s string ) == ( a string ) bool {
2020-04-27 08:13:36 +03:00
if s . str == 0 {
2019-12-19 23:52:45 +03:00
// should never happen
2019-06-22 21:20:28 +03:00
panic ( ' s t r i n g . e q ( ) : n i l s t r i n g ' )
}
if s . len != a . len {
return false
}
2021-05-23 17:22:57 +03:00
if s . len > 0 {
last_idx := s . len - 1
if s [ last_idx ] != a [ last_idx ] {
return false
}
}
2020-07-20 20:06:41 +03:00
unsafe {
2021-08-12 21:46:38 +03:00
return vmemcmp ( s . str , a . str , a . len ) == 0
2020-07-20 20:06:41 +03:00
}
2019-06-22 21:20:28 +03:00
}
2021-10-28 15:09:41 +03:00
// compare returns -1 if `s` < `a`, 0 if `s` == `a`, and 1 if `s` > `a`
[ direct_array_access ]
pub fn ( s string ) compare ( a string ) int {
min_len := if s . len < a . len { s . len } else { a . len }
for i in 0 .. min_len {
if s [ i ] < a [ i ] {
return - 1
}
if s [ i ] > a [ i ] {
return 1
}
}
if s . len < a . len {
return - 1
}
if s . len > a . len {
return 1
}
return 0
}
[ direct_array_access ]
2021-05-24 14:05:29 +03:00
fn ( s string ) < ( a string ) bool {
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2019-06-23 21:25:50 +03:00
if i >= a . len || s [ i ] > a [ i ] {
2019-06-22 21:20:28 +03:00
return false
2020-12-03 18:02:48 +03:00
} else if s [ i ] < a [ i ] {
2019-06-22 21:20:28 +03:00
return true
}
}
2019-06-23 21:25:50 +03:00
if s . len < a . len {
return true
}
return false
2019-06-22 21:20:28 +03:00
}
2021-10-28 15:09:41 +03:00
[ direct_array_access ]
2021-05-24 14:05:29 +03:00
fn ( s string ) + ( a string ) string {
2019-06-22 21:20:28 +03:00
new_len := a . len + s . len
2019-12-19 23:52:45 +03:00
mut res := string {
2021-06-12 11:27:08 +03:00
str : unsafe { malloc_noscan ( new_len + 1 ) }
2020-05-18 22:38:06 +03:00
len : new_len
2019-06-22 21:20:28 +03:00
}
2020-12-03 18:02:48 +03:00
for j in 0 .. s . len {
2020-07-15 22:56:50 +03:00
unsafe {
res . str [ j ] = s . str [ j ]
}
2019-06-22 21:20:28 +03:00
}
2020-12-03 18:02:48 +03:00
for j in 0 .. a . len {
2020-07-15 22:56:50 +03:00
unsafe {
res . str [ s . len + j ] = a . str [ j ]
}
}
unsafe {
2021-04-13 11:29:33 +03:00
res . str [ new_len ] = 0 // V strings are not null terminated, but just in case
2019-06-22 21:20:28 +03:00
}
return res
}
2021-12-04 21:13:40 +03:00
// split_any splits the string to an array by any of the `delim` chars.
// Example: "first row\nsecond row".split_any(" \n") == ['first', 'row', 'second', 'row']
// Split a string using the chars in the delimiter string as delimiters chars.
// If the delimiter string is empty then `.split()` is used.
[ direct_array_access ]
pub fn ( s string ) split_any ( delim string ) [ ] string {
mut res := [ ] string { }
mut i := 0
// check empty source string
if s . len > 0 {
// if empty delimiter string using defautl split
if delim . len <= 0 {
return s . split ( ' ' )
}
for index , ch in s {
for delim_ch in delim {
if ch == delim_ch {
res << s [ i .. index ]
i = index + 1
break
}
}
}
if i < s . len {
res << s [ i .. ]
}
}
return res
}
2020-12-12 13:10:29 +03:00
// split splits the string to an array by `delim`.
// Example: assert 'A B C'.split(' ') == ['A','B','C']
// If `delim` is empty the string is split by it's characters.
// Example: assert 'DEF'.split('') == ['D','E','F']
2019-06-22 21:20:28 +03:00
pub fn ( s string ) split ( delim string ) [ ] string {
2019-12-01 16:10:13 +03:00
return s . split_nth ( delim , 0 )
}
2020-12-12 13:10:29 +03:00
// split_nth splits the string based on the passed `delim` substring.
// It returns the first Nth parts. When N=0, return all the splits.
// The last returned element has the remainder of the string, even if
// the remainder contains more `delim` substrings.
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2019-12-01 16:10:13 +03:00
pub fn ( s string ) split_nth ( delim string , nth int ) [ ] string {
2020-04-26 10:17:13 +03:00
mut res := [ ] string { }
2019-12-01 16:10:13 +03:00
mut i := 0
2021-03-16 21:30:27 +03:00
match delim . len {
0 {
i = 1
for ch in s {
if nth > 0 && i >= nth {
res << s [ i .. ]
break
}
res << ch . ascii_str ( )
i ++
2019-12-01 16:10:13 +03:00
}
2021-03-16 21:30:27 +03:00
return res
2019-11-10 19:37:36 +03:00
}
2021-03-16 21:30:27 +03:00
1 {
mut start := 0
delim_byte := delim [ 0 ]
for i < s . len {
if s [ i ] == delim_byte {
was_last := nth > 0 && res . len == nth - 1
if was_last {
break
}
val := s . substr ( start , i )
res << val
start = i + delim . len
i = start
} else {
i ++
}
2020-01-24 22:12:36 +03:00
}
2021-03-16 21:30:27 +03:00
// Then the remaining right part of the string
if nth < 1 || res . len < nth {
res << s [ start .. ]
}
return res
}
else {
mut start := 0
// Take the left part for each delimiter occurence
for i <= s . len {
is_delim := i + delim . len <= s . len && s . substr ( i , i + delim . len ) == delim
if is_delim {
was_last := nth > 0 && res . len == nth - 1
if was_last {
break
}
val := s . substr ( start , i )
res << val
start = i + delim . len
i = start
} else {
i ++
}
}
// Then the remaining right part of the string
if nth < 1 || res . len < nth {
res << s [ start .. ]
}
return res
2019-06-22 21:20:28 +03:00
}
}
}
2020-12-12 13:10:29 +03:00
// split_into_lines splits the string by newline characters.
2021-05-11 18:57:32 +03:00
// newlines are stripped.
// Both `\n` and `\r\n` newline endings are supported.
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) split_into_lines ( ) [ ] string {
2020-04-26 10:17:13 +03:00
mut res := [ ] string { }
2019-06-22 21:20:28 +03:00
if s . len == 0 {
return res
}
mut start := 0
2021-05-11 18:57:32 +03:00
mut end := 0
2019-06-22 21:20:28 +03:00
for i := 0 ; i < s . len ; i ++ {
2021-05-11 18:57:32 +03:00
if s [ i ] == 10 {
end = if i > 0 && s [ i - 1 ] == 13 { i - 1 } else { i }
res << if start == end { ' ' } else { s [ start .. end ] }
2019-06-22 21:20:28 +03:00
start = i + 1
}
}
2021-05-11 18:57:32 +03:00
if start < s . len {
res << s [ start .. ]
}
2019-06-22 21:20:28 +03:00
return res
}
2019-10-27 09:36:04 +03:00
// used internally for [2..4]
2020-10-15 13:32:28 +03:00
fn ( s string ) substr2 ( start int , _end int , end_max bool ) string {
2019-10-27 09:36:04 +03:00
end := if end_max { s . len } else { _end }
return s . substr ( start , end )
}
2020-12-12 13:10:29 +03:00
// substr returns the string between index positions `start` and `end`.
// Example: assert 'ABCD'.substr(1,3) == 'BC'
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2020-10-15 13:32:28 +03:00
pub fn ( s string ) substr ( start int , end int ) string {
2020-12-03 18:02:48 +03:00
$ if ! no_bounds_checking ? {
2021-03-15 23:16:23 +03:00
if start > end || start > s . len || end > s . len || start < 0 || end < 0 {
panic ( ' s u b s t r ( $ start , $ end ) o u t o f b o u n d s ( l e n = $ s . len ) ' )
2020-02-16 18:13:45 +03:00
}
2019-06-22 21:20:28 +03:00
}
len := end - start
2020-12-11 07:12:18 +03:00
if len == s . len {
return s . clone ( )
}
2019-12-19 23:52:45 +03:00
mut res := string {
2021-06-12 11:27:08 +03:00
str : unsafe { malloc_noscan ( len + 1 ) }
2020-05-18 22:38:06 +03:00
len : len
2019-07-22 17:51:33 +03:00
}
2020-12-03 18:02:48 +03:00
for i in 0 .. len {
2020-07-15 22:56:50 +03:00
unsafe {
res . str [ i ] = s . str [ start + i ]
}
}
unsafe {
2021-04-13 11:29:33 +03:00
res . str [ len ] = 0
2019-07-22 17:51:33 +03:00
}
2019-08-06 06:54:47 +03:00
return res
2019-06-22 21:20:28 +03:00
}
2022-01-17 13:03:10 +03:00
// version of `substr()` that is used in `a[start..end] or {`
// return an error when the index is out of range
[ direct_array_access ]
pub fn ( s string ) substr_with_check ( start int , end int ) ? string {
if start > end || start > s . len || end > s . len || start < 0 || end < 0 {
return error ( ' s u b s t r ( $ start , $ end ) o u t o f b o u n d s ( l e n = $ s . len ) ' )
}
len := end - start
if len == s . len {
return s . clone ( )
}
mut res := string {
str : unsafe { malloc_noscan ( len + 1 ) }
len : len
}
for i in 0 .. len {
unsafe {
res . str [ i ] = s . str [ start + i ]
}
}
unsafe {
res . str [ len ] = 0
}
return res
}
2021-12-22 16:34:02 +03:00
// substr_ni returns the string between index positions `start` and `end` allowing negative indexes
// This function always return a valid string.
[ direct_array_access ]
pub fn ( s string ) substr_ni ( _start int , _end int ) string {
mut start := _start
mut end := _end
// borders math
if start < 0 {
start = s . len + start
if start < 0 {
start = 0
}
}
if end < 0 {
end = s . len + end
if end < 0 {
end = 0
}
}
if end >= s . len {
end = s . len
}
if start > s . len || end < start {
mut res := string {
str : unsafe { malloc_noscan ( 1 ) }
len : 0
}
unsafe {
res . str [ 0 ] = 0
}
return res
}
len := end - start
// string copy
mut res := string {
str : unsafe { malloc_noscan ( len + 1 ) }
len : len
}
for i in 0 .. len {
unsafe {
res . str [ i ] = s . str [ start + i ]
}
}
unsafe {
res . str [ len ] = 0
}
return res
}
2021-01-15 04:26:06 +03:00
// index returns the position of the first character of the input string.
// It will return `-1` if the input string can't be found.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2021-01-15 04:26:06 +03:00
fn ( s string ) index_ ( p string ) int {
2020-02-20 22:14:21 +03:00
if p . len > s . len || p . len == 0 {
2019-06-22 21:20:28 +03:00
return - 1
}
2021-01-15 04:26:06 +03:00
if p . len > 2 {
return s . index_kmp ( p )
}
2019-09-14 23:48:30 +03:00
mut i := 0
for i < s . len {
mut j := 0
2021-01-05 20:59:51 +03:00
for j < p . len && unsafe { s . str [ i + j ] == p . str [ j ] } {
2019-06-22 21:20:28 +03:00
j ++
}
if j == p . len {
2019-09-23 23:34:42 +03:00
return i
2019-06-22 21:20:28 +03:00
}
2019-09-14 23:48:30 +03:00
i ++
2019-06-22 21:20:28 +03:00
}
return - 1
}
2020-12-12 13:10:29 +03:00
// index returns the position of the first character of the input string.
// It will return `none` if the input string can't be found.
2019-11-30 13:09:05 +03:00
pub fn ( s string ) index ( p string ) ? int {
2021-01-15 04:26:06 +03:00
idx := s . index_ ( p )
if idx == - 1 {
2019-11-30 13:09:05 +03:00
return none
}
2021-01-15 04:26:06 +03:00
return idx
2019-11-30 13:09:05 +03:00
}
2020-12-12 13:10:29 +03:00
// index_kmp does KMP search.
2021-05-06 11:44:48 +03:00
[ direct_array_access ; manualfree ]
2019-12-12 21:44:52 +03:00
fn ( s string ) index_kmp ( p string ) int {
2019-12-19 23:52:45 +03:00
if p . len > s . len {
return - 1
}
2020-12-03 18:02:48 +03:00
mut prefix := [ ] int { len : p . len }
2021-03-23 22:48:08 +03:00
defer {
unsafe { prefix . free ( ) }
}
2019-12-19 23:52:45 +03:00
mut j := 0
for i := 1 ; i < p . len ; i ++ {
2021-01-05 20:59:51 +03:00
for unsafe { p . str [ j ] != p . str [ i ] } && j > 0 {
2019-12-19 23:52:45 +03:00
j = prefix [ j - 1 ]
}
2021-01-05 20:59:51 +03:00
if unsafe { p . str [ j ] == p . str [ i ] } {
2019-12-19 23:52:45 +03:00
j ++
}
prefix [ i ] = j
}
j = 0
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2021-01-05 20:59:51 +03:00
for unsafe { p . str [ j ] != s . str [ i ] } && j > 0 {
2019-12-19 23:52:45 +03:00
j = prefix [ j - 1 ]
}
2021-01-05 20:59:51 +03:00
if unsafe { p . str [ j ] == s . str [ i ] } {
2019-12-19 23:52:45 +03:00
j ++
}
if j == p . len {
return i - p . len + 1
}
}
return - 1
2019-09-14 23:48:30 +03:00
}
2020-12-12 13:10:29 +03:00
// index_any returns the position of any of the characters in the input string - if found.
2019-08-01 16:01:03 +03:00
pub fn ( s string ) index_any ( chars string ) int {
2021-09-06 15:06:37 +03:00
for i , ss in s {
for c in chars {
if c == ss {
return i
}
2021-01-15 04:26:06 +03:00
}
2019-08-01 16:01:03 +03:00
}
return - 1
}
2020-12-12 13:10:29 +03:00
// last_index returns the position of the last occurence of the input string.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2021-01-15 04:26:06 +03:00
fn ( s string ) last_index_ ( p string ) int {
2020-02-20 22:14:21 +03:00
if p . len > s . len || p . len == 0 {
2021-01-15 04:26:06 +03:00
return - 1
2019-06-22 21:20:28 +03:00
}
mut i := s . len - p . len
for i >= 0 {
mut j := 0
2021-01-05 20:59:51 +03:00
for j < p . len && unsafe { s . str [ i + j ] == p . str [ j ] } {
2019-06-22 21:20:28 +03:00
j ++
}
if j == p . len {
return i
}
i --
}
2021-01-15 04:26:06 +03:00
return - 1
}
// last_index returns the position of the last occurence of the input string.
pub fn ( s string ) last_index ( p string ) ? int {
idx := s . last_index_ ( p )
if idx == - 1 {
return none
}
return idx
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// index_after returns the position of the input string, starting search from `start` position.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) index_after ( p string , start int ) int {
if p . len > s . len {
return - 1
}
mut strt := start
if start < 0 {
strt = 0
}
if start >= s . len {
return - 1
}
mut i := strt
for i < s . len {
mut j := 0
mut ii := i
2021-01-05 20:59:51 +03:00
for j < p . len && unsafe { s . str [ ii ] == p . str [ j ] } {
2019-06-22 21:20:28 +03:00
j ++
ii ++
}
if j == p . len {
return i
}
i ++
}
return - 1
}
2020-12-12 13:10:29 +03:00
// index_byte returns the index of byte `c` if found in the string.
// index_byte returns -1 if the byte can not be found.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2022-04-15 14:58:56 +03:00
pub fn ( s string ) index_u8 ( c u8 ) int {
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2021-01-05 20:59:51 +03:00
if unsafe { s . str [ i ] } == c {
2019-10-10 20:04:11 +03:00
return i
}
}
return - 1
}
2020-12-12 13:10:29 +03:00
// last_index_byte returns the index of the last occurence of byte `c` if found in the string.
// last_index_byte returns -1 if the byte is not found.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2022-04-15 14:58:56 +03:00
pub fn ( s string ) last_index_u8 ( c u8 ) int {
2019-12-19 23:52:45 +03:00
for i := s . len - 1 ; i >= 0 ; i -- {
2021-01-05 20:59:51 +03:00
if unsafe { s . str [ i ] == c } {
2019-10-10 20:04:11 +03:00
return i
}
}
return - 1
}
2020-12-12 13:10:29 +03:00
// count returns the number of occurrences of `substr` in the string.
// count returns -1 if no `substr` could be found.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-08-03 00:30:22 +03:00
pub fn ( s string ) count ( substr string ) int {
if s . len == 0 || substr . len == 0 {
return 0
}
2019-08-26 13:32:53 +03:00
if substr . len > s . len {
return 0
}
2021-03-17 01:19:48 +03:00
2019-08-03 00:30:22 +03:00
mut n := 0
2021-03-17 01:19:48 +03:00
if substr . len == 1 {
target := substr [ 0 ]
for letter in s {
if letter == target {
n ++
}
}
return n
}
2019-08-03 01:18:19 +03:00
mut i := 0
2019-08-03 00:30:22 +03:00
for {
2019-08-03 23:24:03 +03:00
i = s . index_after ( substr , i )
2019-08-03 00:30:22 +03:00
if i == - 1 {
return n
}
2019-08-03 01:18:19 +03:00
i += substr . len
2019-08-03 00:30:22 +03:00
n ++
}
2019-08-08 10:49:56 +03:00
return 0 // TODO can never get here - v doesn't know that
2019-08-03 00:30:22 +03:00
}
2020-12-12 13:10:29 +03:00
// contains returns `true` if the string contains `substr`.
2022-04-04 18:13:24 +03:00
// See also: [`string.index`](#string.index)
2020-07-29 22:48:50 +03:00
pub fn ( s string ) contains ( substr string ) bool {
if substr . len == 0 {
2020-06-14 12:24:15 +03:00
return true
}
2021-01-15 04:26:06 +03:00
if s . index_ ( substr ) == - 1 {
return false
}
2019-11-30 13:09:05 +03:00
return true
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// contains_any returns `true` if the string contains any chars in `chars`.
2020-07-29 22:48:50 +03:00
pub fn ( s string ) contains_any ( chars string ) bool {
for c in chars {
2021-03-23 11:38:56 +03:00
if s . contains ( c . ascii_str ( ) ) {
2020-07-29 22:48:50 +03:00
return true
}
}
return false
}
2022-06-23 01:41:42 +03:00
// contains_only returns `true`, if the string contains only the characters in `chars`.
pub fn ( s string ) contains_only ( chars string ) bool {
if chars . len == 0 {
return false
}
for ch in s {
mut res := 0
for i := 0 ; i < chars . len && res == 0 ; i ++ {
res += int ( ch == unsafe { chars . str [ i ] } )
}
if res == 0 {
return false
}
}
return true
}
2020-12-12 13:10:29 +03:00
// contains_any_substr returns `true` if the string contains any of the strings in `substrs`.
2020-07-29 22:48:50 +03:00
pub fn ( s string ) contains_any_substr ( substrs [ ] string ) bool {
if substrs . len == 0 {
return true
}
for sub in substrs {
if s . contains ( sub ) {
return true
}
}
return false
}
2020-12-12 13:10:29 +03:00
// starts_with returns `true` if the string starts with `p`.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) starts_with ( p string ) bool {
2020-02-19 17:18:09 +03:00
if p . len > s . len {
2019-12-19 23:52:45 +03:00
return false
}
2020-12-03 18:02:48 +03:00
for i in 0 .. p . len {
2021-01-05 20:59:51 +03:00
if unsafe { s . str [ i ] != p . str [ i ] } {
2020-02-19 17:18:09 +03:00
return false
}
}
return true
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// ends_with returns `true` if the string ends with `p`.
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) ends_with ( p string ) bool {
if p . len > s . len {
return false
}
2020-12-03 18:02:48 +03:00
for i in 0 .. p . len {
2021-05-03 14:14:32 +03:00
if unsafe { p . str [ i ] != s . str [ s . len - p . len + i ] } {
2020-02-19 17:18:09 +03:00
return false
}
2019-12-12 21:44:52 +03:00
}
2020-02-19 17:18:09 +03:00
return true
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// to_lower returns the string in all lowercase characters.
2019-06-22 21:20:28 +03:00
// TODO only works with ASCII
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) to_lower ( ) string {
2020-07-15 22:56:50 +03:00
unsafe {
2021-06-12 11:27:08 +03:00
mut b := malloc_noscan ( s . len + 1 )
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2021-04-14 08:50:50 +03:00
if s . str [ i ] >= ` A ` && s . str [ i ] <= ` Z ` {
b [ i ] = s . str [ i ] + 32
} else {
b [ i ] = s . str [ i ]
}
2020-07-15 22:56:50 +03:00
}
2021-03-14 19:21:45 +03:00
b [ s . len ] = 0
2020-07-15 22:56:50 +03:00
return tos ( b , s . len )
2019-06-22 21:20:28 +03:00
}
}
2020-12-12 13:10:29 +03:00
// is_lower returns `true` if all characters in the string is lowercase.
// Example: assert 'hello developer'.is_lower() == true
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2020-04-12 13:09:05 +03:00
pub fn ( s string ) is_lower ( ) bool {
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2020-04-12 13:09:05 +03:00
if s [ i ] >= ` A ` && s [ i ] <= ` Z ` {
return false
}
}
return true
}
2020-12-12 13:10:29 +03:00
// to_upper returns the string in all uppercase characters.
// Example: assert 'Hello V'.to_upper() == 'HELLO V'
2021-10-29 15:49:30 +03:00
[ direct_array_access ]
2019-06-22 21:20:28 +03:00
pub fn ( s string ) to_upper ( ) string {
2020-07-15 22:56:50 +03:00
unsafe {
2021-06-12 11:27:08 +03:00
mut b := malloc_noscan ( s . len + 1 )
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2021-04-14 08:50:50 +03:00
if s . str [ i ] >= ` a ` && s . str [ i ] <= ` z ` {
b [ i ] = s . str [ i ] - 32
} else {
b [ i ] = s . str [ i ]
}
2020-07-15 22:56:50 +03:00
}
2021-03-14 19:21:45 +03:00
b [ s . len ] = 0
2020-07-15 22:56:50 +03:00
return tos ( b , s . len )
2019-06-22 21:20:28 +03:00
}
}
2020-12-12 13:10:29 +03:00
// is_upper returns `true` if all characters in the string is uppercase.
2022-04-03 23:41:02 +03:00
// See also: [`byte.is_capital`](#byte.is_capital)
2020-12-12 13:10:29 +03:00
// Example: assert 'HELLO V'.is_upper() == true
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2020-04-12 13:09:05 +03:00
pub fn ( s string ) is_upper ( ) bool {
2020-12-03 18:02:48 +03:00
for i in 0 .. s . len {
2020-04-12 13:09:05 +03:00
if s [ i ] >= ` a ` && s [ i ] <= ` z ` {
2022-04-15 18:25:45 +03:00
return false
2020-04-12 13:09:05 +03:00
}
}
return true
}
2020-12-12 13:10:29 +03:00
// capitalize returns the string with the first character capitalized.
// Example: assert 'hello'.capitalize() == 'Hello'
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2019-08-26 13:32:53 +03:00
pub fn ( s string ) capitalize ( ) string {
2020-02-20 13:33:38 +03:00
if s . len == 0 {
return ' '
}
2021-03-22 17:45:29 +03:00
s0 := s [ 0 ]
letter := s0 . ascii_str ( )
uletter := letter . to_upper ( )
if s . len == 1 {
return uletter
}
srest := s [ 1 .. ]
res := uletter + srest
return res
2019-08-26 13:32:53 +03:00
}
2021-11-07 14:30:40 +03:00
// is_capital returns `true`, if the first character in the string `s`,
// is a capital letter, and the rest are NOT.
2020-12-12 13:10:29 +03:00
// Example: assert 'Hello'.is_capital() == true
2021-11-07 14:30:40 +03:00
// Example: assert 'HelloWorld'.is_capital() == false
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2020-04-12 13:09:05 +03:00
pub fn ( s string ) is_capital ( ) bool {
if s . len == 0 || ! ( s [ 0 ] >= ` A ` && s [ 0 ] <= ` Z ` ) {
return false
}
2020-12-03 18:02:48 +03:00
for i in 1 .. s . len {
2020-04-12 13:09:05 +03:00
if s [ i ] >= ` A ` && s [ i ] <= ` Z ` {
return false
}
}
return true
}
2021-11-07 14:30:40 +03:00
// starts_with_capital returns `true`, if the first character in the string `s`,
// is a capital letter, even if the rest are not.
// Example: assert 'Hello'.starts_with_capital() == true
// Example: assert 'Hello. World.'.starts_with_capital() == true
[ direct_array_access ]
pub fn ( s string ) starts_with_capital ( ) bool {
if s . len == 0 || ! ( s [ 0 ] >= ` A ` && s [ 0 ] <= ` Z ` ) {
return false
}
return true
}
2020-12-12 13:10:29 +03:00
// title returns the string with each word capitalized.
// Example: assert 'hello v developer'.title() == 'Hello V Developer'
2019-08-26 13:32:53 +03:00
pub fn ( s string ) title ( ) string {
2019-12-19 23:52:45 +03:00
words := s . split ( ' ' )
2020-04-26 10:17:13 +03:00
mut tit := [ ] string { }
2019-08-26 13:32:53 +03:00
for word in words {
tit << word . capitalize ( )
}
title := tit . join ( ' ' )
2019-11-28 09:46:10 +03:00
return title
2019-08-26 13:32:53 +03:00
}
2021-11-07 14:30:40 +03:00
// is_title returns true if all words of the string are capitalized.
2020-12-12 13:10:29 +03:00
// Example: assert 'Hello V Developer'.is_title() == true
2020-04-12 13:09:05 +03:00
pub fn ( s string ) is_title ( ) bool {
words := s . split ( ' ' )
for word in words {
if ! word . is_capital ( ) {
return false
}
}
return true
}
2020-12-12 13:10:29 +03:00
// find_between returns the string found between `start` string and `end` string.
// Example: assert 'hey [man] how you doin'.find_between('[', ']') == 'man'
2020-10-15 13:32:28 +03:00
pub fn ( s string ) find_between ( start string , end string ) string {
2021-01-15 04:26:06 +03:00
start_pos := s . index_ ( start )
if start_pos == - 1 {
return ' '
}
2019-06-22 21:20:28 +03:00
// First get everything to the right of 'start'
2021-01-22 12:26:07 +03:00
val := s [ start_pos + start . len .. ]
2021-01-15 04:26:06 +03:00
end_pos := val . index_ ( end )
if end_pos == - 1 {
return val
}
2021-01-22 12:26:07 +03:00
return val [ .. end_pos ]
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// trim_space strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start and end of the string.
// Example: assert ' Hello V '.trim_space() == 'Hello V'
2019-06-22 21:20:28 +03:00
pub fn ( s string ) trim_space ( ) string {
2019-09-06 13:22:37 +03:00
return s . trim ( ' \n \t \v \f \r ' )
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// trim strips any of the characters given in `cutset` from the start and end of the string.
// Example: assert ' ffHello V ffff'.trim(' f') == 'Hello V'
2019-08-27 07:53:56 +03:00
pub fn ( s string ) trim ( cutset string ) string {
2019-09-06 13:22:37 +03:00
if s . len < 1 || cutset . len < 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-06-22 21:20:28 +03:00
}
2022-10-22 12:56:05 +03:00
left , right := s . trim_indexes ( cutset )
return s . substr ( left , right )
}
// trim_indexes gets the new start and end indicies of a string when any of the characters given in `cutset` were stripped from the start and end of the string. Should be used as an input to `substr()`. If the string contains only the characters in `cutset`, both values returned are zero.
// Example: left, right := '-hi-'.trim_indexes('-')
[ direct_array_access ]
pub fn ( s string ) trim_indexes ( cutset string ) ( int , int ) {
2019-08-27 07:53:56 +03:00
mut pos_left := 0
mut pos_right := s . len - 1
mut cs_match := true
for pos_left <= s . len && pos_right >= - 1 && cs_match {
cs_match = false
2021-03-18 20:52:33 +03:00
for cs in cutset {
if s [ pos_left ] == cs {
pos_left ++
cs_match = true
break
}
2019-08-27 07:53:56 +03:00
}
2021-03-18 20:52:33 +03:00
for cs in cutset {
if s [ pos_right ] == cs {
pos_right --
cs_match = true
break
}
2019-08-27 07:53:56 +03:00
}
if pos_left > pos_right {
2022-10-22 12:56:05 +03:00
return 0 , 0
2019-08-27 07:53:56 +03:00
}
2019-06-22 21:20:28 +03:00
}
2022-10-22 12:56:05 +03:00
return pos_left , pos_right + 1
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// trim_left strips any of the characters given in `cutset` from the left of the string.
// Example: assert 'd Hello V developer'.trim_left(' d') == 'Hello V developer'
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2019-06-27 14:14:59 +03:00
pub fn ( s string ) trim_left ( cutset string ) string {
2019-09-06 13:22:37 +03:00
if s . len < 1 || cutset . len < 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-06-22 21:20:28 +03:00
}
2019-08-27 07:53:56 +03:00
mut pos := 0
2021-03-18 20:52:33 +03:00
for pos < s . len {
mut found := false
for cs in cutset {
if s [ pos ] == cs {
found = true
break
}
}
if ! found {
break
}
2019-08-26 14:18:58 +03:00
pos ++
2019-06-22 21:20:28 +03:00
}
2021-01-22 12:26:07 +03:00
return s [ pos .. ]
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// trim_right strips any of the characters given in `cutset` from the right of the string.
// Example: assert ' Hello V d'.trim_right(' d') == ' Hello V'
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2019-06-27 14:14:59 +03:00
pub fn ( s string ) trim_right ( cutset string ) string {
2019-09-06 13:22:37 +03:00
if s . len < 1 || cutset . len < 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-08-17 22:19:37 +03:00
}
2019-08-27 07:53:56 +03:00
mut pos := s . len - 1
2021-03-18 20:52:33 +03:00
for pos >= 0 {
mut found := false
for cs in cutset {
if s [ pos ] == cs {
found = true
}
}
if ! found {
break
}
2019-08-17 22:19:37 +03:00
pos --
}
2021-03-22 17:45:29 +03:00
if pos < 0 {
return ' '
}
return s [ .. pos + 1 ]
2019-06-22 21:20:28 +03:00
}
2022-01-05 13:49:22 +03:00
// trim_string_left strips `str` from the start of the string.
// Example: assert 'WorldHello V'.trim_string_left('World') == 'Hello V'
pub fn ( s string ) trim_string_left ( str string ) string {
2020-05-15 20:37:14 +03:00
if s . starts_with ( str ) {
2020-06-30 16:44:53 +03:00
return s [ str . len .. ]
2020-05-15 20:37:14 +03:00
}
2021-03-18 23:22:43 +03:00
return s . clone ( )
2020-05-15 20:37:14 +03:00
}
2022-01-05 13:49:22 +03:00
// trim_string_right strips `str` from the end of the string.
// Example: assert 'Hello VWorld'.trim_string_right('World') == 'Hello V'
pub fn ( s string ) trim_string_right ( str string ) string {
2020-05-15 20:37:14 +03:00
if s . ends_with ( str ) {
2020-12-03 18:02:48 +03:00
return s [ .. s . len - str . len ]
2020-05-15 20:37:14 +03:00
}
2021-03-18 23:22:43 +03:00
return s . clone ( )
2020-05-15 20:37:14 +03:00
}
2022-01-05 13:49:22 +03:00
// trim_prefix strips `str` from the start of the string.
// Example: assert 'WorldHello V'.trim_prefix('World') == 'Hello V'
[ deprecated : ' u s e s . t r i m _ s t r i n g _ l e f t ( x ) i n s t e a d ' ]
[ deprecated_after : ' 2 0 2 2 - 0 1 - 1 9 ' ]
pub fn ( s string ) trim_prefix ( str string ) string {
return s . trim_string_left ( str )
}
// trim_suffix strips `str` from the end of the string.
// Example: assert 'Hello VWorld'.trim_suffix('World') == 'Hello V'
[ deprecated : ' u s e s . t r i m _ s t r i n g _ r i g h t ( x ) i n s t e a d ' ]
[ deprecated_after : ' 2 0 2 2 - 0 1 - 1 9 ' ]
pub fn ( s string ) trim_suffix ( str string ) string {
return s . trim_string_right ( str )
}
2020-12-12 13:10:29 +03:00
// compare_strings returns `-1` if `a < b`, `1` if `a > b` else `0`.
2020-10-15 13:32:28 +03:00
pub fn compare_strings ( a & string , b & string ) int {
2021-05-24 14:05:29 +03:00
if a < b {
2019-06-22 21:20:28 +03:00
return - 1
}
2021-05-24 14:05:29 +03:00
if a > b {
2019-06-22 21:20:28 +03:00
return 1
}
return 0
}
2020-12-12 13:10:29 +03:00
// compare_strings_by_len returns `-1` if `a.len < b.len`, `1` if `a.len > b.len` else `0`.
2020-10-15 13:32:28 +03:00
fn compare_strings_by_len ( a & string , b & string ) int {
2019-06-22 21:20:28 +03:00
if a . len < b . len {
return - 1
}
if a . len > b . len {
return 1
}
return 0
}
2020-12-12 13:10:29 +03:00
// compare_lower_strings returns the same as compare_strings but converts `a` and `b` to lower case before comparing.
2020-10-15 13:32:28 +03:00
fn compare_lower_strings ( a & string , b & string ) int {
2019-06-22 21:20:28 +03:00
aa := a . to_lower ( )
2019-06-25 07:29:02 +03:00
bb := b . to_lower ( )
2021-03-26 11:42:40 +03:00
return compare_strings ( & aa , & bb )
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// sort_ignore_case sorts the string array using case insesitive comparing.
2020-05-17 14:51:18 +03:00
pub fn ( mut s [ ] string ) sort_ignore_case ( ) {
2019-06-22 21:20:28 +03:00
s . sort_with_compare ( compare_lower_strings )
}
2020-12-12 13:10:29 +03:00
// sort_by_len sorts the the string array by each string's `.len` length.
2020-05-17 14:51:18 +03:00
pub fn ( mut s [ ] string ) sort_by_len ( ) {
2019-06-22 21:20:28 +03:00
s . sort_with_compare ( compare_strings_by_len )
}
2021-03-18 23:22:43 +03:00
// str returns a copy of the string
2020-04-29 14:51:42 +03:00
pub fn ( s string ) str ( ) string {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2020-04-29 14:51:42 +03:00
}
2020-12-12 13:10:29 +03:00
// at returns the byte at index `idx`.
2022-04-15 14:58:56 +03:00
// Example: assert 'ABC'.at(1) == u8(`B`)
2019-06-22 21:20:28 +03:00
fn ( s string ) at ( idx int ) byte {
2020-12-03 18:02:48 +03:00
$ if ! no_bounds_checking ? {
2020-02-16 18:13:45 +03:00
if idx < 0 || idx >= s . len {
panic ( ' s t r i n g i n d e x o u t o f r a n g e : $ idx / $ s . len ' )
}
2019-06-22 21:20:28 +03:00
}
2020-07-15 22:56:50 +03:00
unsafe {
return s . str [ idx ]
}
2019-06-22 21:20:28 +03:00
}
2021-07-05 21:00:30 +03:00
// version of `at()` that is used in `a[i] or {`
// return an error when the index is out of range
2022-04-15 14:45:52 +03:00
fn ( s string ) at_with_check ( idx int ) ? u8 {
2021-07-05 21:00:30 +03:00
if idx < 0 || idx >= s . len {
return error ( ' s t r i n g i n d e x o u t o f r a n g e ' )
}
unsafe {
return s . str [ idx ]
}
}
2021-06-29 14:50:55 +03:00
// is_space returns `true` if the byte is a white space character.
// The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0
2022-04-15 14:58:56 +03:00
// Example: assert u8(` `).is_space() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_space ( ) bool {
2021-06-29 14:50:55 +03:00
// 0x85 is NEXT LINE (NEL)
// 0xa0 is NO-BREAK SPACE
return c == 32 || ( c > 8 && c < 14 ) || ( c == 0x85 ) || ( c == 0xa0 )
}
2020-12-12 13:10:29 +03:00
// is_digit returns `true` if the byte is in range 0-9 and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`9`).is_digit() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_digit ( ) bool {
2019-06-22 21:20:28 +03:00
return c >= ` 0 ` && c <= ` 9 `
}
2020-12-12 13:10:29 +03:00
// is_hex_digit returns `true` if the byte is either in range 0-9, a-f or A-F and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`F`).is_hex_digit() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_hex_digit ( ) bool {
2021-11-07 14:30:40 +03:00
return ( c >= ` 0 ` && c <= ` 9 ` ) || ( c >= ` a ` && c <= ` f ` ) || ( c >= ` A ` && c <= ` F ` )
2019-07-24 01:06:48 +03:00
}
2020-12-12 13:10:29 +03:00
// is_oct_digit returns `true` if the byte is in range 0-7 and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`7`).is_oct_digit() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_oct_digit ( ) bool {
2019-07-24 01:06:48 +03:00
return c >= ` 0 ` && c <= ` 7 `
}
2020-12-12 13:10:29 +03:00
// is_bin_digit returns `true` if the byte is a binary digit (0 or 1) and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`0`).is_bin_digit() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_bin_digit ( ) bool {
2020-01-23 05:28:25 +03:00
return c == ` 0 ` || c == ` 1 `
}
2020-12-12 13:10:29 +03:00
// is_letter returns `true` if the byte is in range a-z or A-Z and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`V`).is_letter() == true
2021-06-29 14:50:55 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_letter ( ) bool {
2019-06-22 21:20:28 +03:00
return ( c >= ` a ` && c <= ` z ` ) || ( c >= ` A ` && c <= ` Z ` )
}
2021-09-19 16:22:28 +03:00
// is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise.
2022-05-05 17:22:25 +03:00
// Example: assert u8(`V`).is_alnum() == true
2021-09-19 16:22:28 +03:00
[ inline ]
2022-04-15 14:45:52 +03:00
pub fn ( c u8 ) is_alnum ( ) bool {
2021-11-07 14:30:40 +03:00
return ( c >= ` a ` && c <= ` z ` ) || ( c >= ` A ` && c <= ` Z ` ) || ( c >= ` 0 ` && c <= ` 9 ` )
2021-09-19 16:22:28 +03:00
}
2020-12-12 13:10:29 +03:00
// free allows for manually freeing the memory occupied by the string
2021-05-23 12:37:23 +03:00
[ manualfree ; unsafe ]
2020-05-06 19:03:44 +03:00
pub fn ( s & string ) free ( ) {
2020-07-11 14:22:16 +03:00
$ if prealloc {
return
}
2020-06-30 18:28:28 +03:00
if s . is_lit == - 98761234 {
2022-04-15 14:58:56 +03:00
double_free_msg := unsafe { & u8 ( c ' d o u b l e s t r i n g . f r e e ( ) d e t e c t e d \n ' ) }
2021-06-23 14:29:38 +03:00
double_free_msg_len := unsafe { vstrlen ( double_free_msg ) }
2021-04-14 08:50:50 +03:00
$ if freestanding {
2021-06-23 14:29:38 +03:00
bare_eprint ( double_free_msg , u64 ( double_free_msg_len ) )
2021-04-14 08:50:50 +03:00
} $ else {
2021-06-23 14:29:38 +03:00
_write_buf_to_fd ( 1 , double_free_msg , double_free_msg_len )
2021-04-14 08:50:50 +03:00
}
2020-06-30 18:28:28 +03:00
return
}
2021-06-10 15:21:57 +03:00
if s . is_lit == 1 || s . str == 0 {
2020-06-12 20:20:51 +03:00
return
}
2021-02-14 21:31:42 +03:00
unsafe {
2021-12-16 16:59:46 +03:00
// C.printf(c's: %x %s\n', s.str, s.str)
2021-02-14 21:31:42 +03:00
free ( s . str )
}
2020-06-30 18:28:28 +03:00
s . is_lit = - 98761234
2019-06-22 21:20:28 +03:00
}
2021-06-29 14:40:37 +03:00
// before returns the contents before `sub` in the string.
// If the substring is not found, it returns the full input string.
// Example: assert '23:34:45.234'.before('.') == '23:34:45'
// Example: assert 'abcd'.before('.') == 'abcd'
// TODO: deprecate and remove either .before or .all_before
pub fn ( s string ) before ( sub string ) string {
pos := s . index_ ( sub )
2021-02-07 06:48:54 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2021-02-07 06:48:54 +03:00
}
return s [ .. pos ]
}
2021-06-29 14:40:37 +03:00
// all_before returns the contents before `sub` in the string.
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.all_before('.') == '23:34:45'
2021-06-29 14:40:37 +03:00
// Example: assert 'abcd'.all_before('.') == 'abcd'
pub fn ( s string ) all_before ( sub string ) string {
2021-02-07 06:48:54 +03:00
// TODO remove dup method
2021-06-29 14:40:37 +03:00
pos := s . index_ ( sub )
2021-01-15 04:26:06 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2021-01-15 04:26:06 +03:00
}
2021-01-22 12:26:07 +03:00
return s [ .. pos ]
2019-06-22 21:20:28 +03:00
}
2021-06-29 14:40:37 +03:00
// all_before_last returns the contents before the last occurence of `sub` in the string.
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.all_before_last(':') == '23:34'
2021-06-29 14:40:37 +03:00
// Example: assert 'abcd'.all_before_last('.') == 'abcd'
pub fn ( s string ) all_before_last ( sub string ) string {
pos := s . last_index_ ( sub )
2021-01-15 04:26:06 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2021-01-15 04:26:06 +03:00
}
2021-01-22 12:26:07 +03:00
return s [ .. pos ]
2019-06-22 21:20:28 +03:00
}
2021-06-29 14:40:37 +03:00
// all_after returns the contents after `sub` in the string.
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.all_after('.') == '234'
2021-06-29 14:50:55 +03:00
// Example: assert 'abcd'.all_after('z') == 'abcd'
2021-06-29 14:40:37 +03:00
pub fn ( s string ) all_after ( sub string ) string {
pos := s . index_ ( sub )
2021-01-15 04:26:06 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2021-01-15 04:26:06 +03:00
}
2021-06-29 14:40:37 +03:00
return s [ pos + sub . len .. ]
2020-05-20 12:04:28 +03:00
}
2021-06-29 14:40:37 +03:00
// all_after_last returns the contents after the last occurence of `sub` in the string.
2021-06-29 14:50:55 +03:00
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.all_after_last(':') == '45.234'
2021-06-29 14:40:37 +03:00
// Example: assert 'abcd'.all_after_last('z') == 'abcd'
pub fn ( s string ) all_after_last ( sub string ) string {
pos := s . last_index_ ( sub )
2021-01-15 04:26:06 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2021-01-15 04:26:06 +03:00
}
2021-06-29 14:40:37 +03:00
return s [ pos + sub . len .. ]
2019-06-22 21:20:28 +03:00
}
2022-07-30 14:29:41 +03:00
// all_after_first returns the contents after the first occurence of `sub` in the string.
// If the substring is not found, it returns the full input string.
// Example: assert '23:34:45.234'.all_after_first(':') == '34:45.234'
// Example: assert 'abcd'.all_after_first('z') == 'abcd'
pub fn ( s string ) all_after_first ( sub string ) string {
pos := s . index_ ( sub )
if pos == - 1 {
return s . clone ( )
}
return s [ pos + sub . len .. ]
}
2021-06-29 14:40:37 +03:00
// after returns the contents after the last occurence of `sub` in the string.
2021-06-29 14:50:55 +03:00
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.after(':') == '45.234'
2021-06-29 14:40:37 +03:00
// Example: assert 'abcd'.after('z') == 'abcd'
// TODO: deprecate either .all_after_last or .after
pub fn ( s string ) after ( sub string ) string {
return s . all_after_last ( sub )
2020-05-20 12:04:28 +03:00
}
2020-02-18 22:20:15 +03:00
2021-06-29 14:40:37 +03:00
// after_char returns the contents after the first occurence of `sub` character in the string.
2021-06-29 14:50:55 +03:00
// If the substring is not found, it returns the full input string.
2020-12-12 13:10:29 +03:00
// Example: assert '23:34:45.234'.after_char(`:`) == '34:45.234'
2021-06-29 14:50:55 +03:00
// Example: assert 'abcd'.after_char(`:`) == 'abcd'
2022-04-15 14:45:52 +03:00
pub fn ( s string ) after_char ( sub u8 ) string {
2021-06-29 14:40:37 +03:00
mut pos := - 1
2020-05-18 06:10:56 +03:00
for i , c in s {
2021-06-29 14:40:37 +03:00
if c == sub {
2020-05-18 06:10:56 +03:00
pos = i
break
}
}
2021-06-29 14:40:37 +03:00
if pos == - 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2020-05-18 06:10:56 +03:00
}
2021-01-22 12:26:07 +03:00
return s [ pos + 1 .. ]
2020-05-18 06:10:56 +03:00
}
2021-05-02 19:31:47 +03:00
// join joins a string array into a string using `sep` separator.
2020-12-12 13:10:29 +03:00
// Example: assert ['Hello','V'].join(' ') == 'Hello V'
2021-05-02 19:31:47 +03:00
pub fn ( a [ ] string ) join ( sep string ) string {
2019-06-22 21:20:28 +03:00
if a . len == 0 {
return ' '
}
mut len := 0
2020-04-27 16:16:31 +03:00
for val in a {
2021-05-02 19:31:47 +03:00
len += val . len + sep . len
2019-06-22 21:20:28 +03:00
}
2021-05-02 19:31:47 +03:00
len -= sep . len
2019-06-22 21:20:28 +03:00
// Allocate enough memory
2021-04-04 17:43:32 +03:00
mut res := string {
2021-06-12 11:27:08 +03:00
str : unsafe { malloc_noscan ( len + 1 ) }
2021-04-07 15:25:45 +03:00
len : len
2021-04-04 17:43:32 +03:00
}
2019-06-22 21:20:28 +03:00
mut idx := 0
for i , val in a {
2021-04-13 11:29:33 +03:00
unsafe {
2022-04-15 14:45:52 +03:00
vmemcpy ( voidptr ( res . str + idx ) , val . str , val . len )
2021-04-13 12:01:23 +03:00
idx += val . len
2019-06-22 21:20:28 +03:00
}
2021-05-02 19:31:47 +03:00
// Add sep if it's not last
2021-04-13 12:01:23 +03:00
if i != a . len - 1 {
2021-04-13 11:29:33 +03:00
unsafe {
2022-04-15 14:45:52 +03:00
vmemcpy ( voidptr ( res . str + idx ) , sep . str , sep . len )
2021-05-02 19:31:47 +03:00
idx += sep . len
2019-06-22 21:20:28 +03:00
}
}
}
2020-07-15 22:56:50 +03:00
unsafe {
2021-04-13 11:29:33 +03:00
res . str [ res . len ] = 0
2020-07-15 22:56:50 +03:00
}
2019-06-22 21:20:28 +03:00
return res
}
2020-12-12 13:10:29 +03:00
// join joins a string array into a string using a `\n` newline delimiter.
2019-06-30 14:06:46 +03:00
pub fn ( s [ ] string ) join_lines ( ) string {
2019-06-22 21:20:28 +03:00
return s . join ( ' \n ' )
}
2020-12-12 13:10:29 +03:00
// reverse returns a reversed string.
// Example: assert 'Hello V'.reverse() == 'V olleH'
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2019-06-27 14:14:59 +03:00
pub fn ( s string ) reverse ( ) string {
2020-02-20 22:14:21 +03:00
if s . len == 0 || s . len == 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-12-16 21:29:32 +03:00
}
2019-12-19 23:52:45 +03:00
mut res := string {
2021-06-12 11:27:08 +03:00
str : unsafe { malloc_noscan ( s . len + 1 ) }
2020-05-18 22:38:06 +03:00
len : s . len
2019-06-27 03:03:19 +03:00
}
for i := s . len - 1 ; i >= 0 ; i -- {
2020-07-15 22:56:50 +03:00
unsafe {
res . str [ s . len - i - 1 ] = s [ i ]
}
2019-06-27 03:03:19 +03:00
}
2021-05-29 16:45:26 +03:00
unsafe {
res . str [ res . len ] = 0
}
2019-06-27 03:03:19 +03:00
return res
}
2019-10-27 22:29:55 +03:00
// limit returns a portion of the string, starting at `0` and extending for a given number of characters afterward.
2019-06-22 21:20:28 +03:00
// 'hello'.limit(2) => 'he'
// 'hi'.limit(10) => 'hi'
2019-06-27 14:14:59 +03:00
pub fn ( s string ) limit ( max int ) string {
2021-07-03 20:14:09 +03:00
u := s . runes ( )
2019-06-22 21:20:28 +03:00
if u . len <= max {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-06-22 21:20:28 +03:00
}
2021-07-03 20:14:09 +03:00
return u [ 0 .. max ] . string ( )
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// hash returns an integer hash of the string.
2019-06-22 21:20:28 +03:00
pub fn ( s string ) hash ( ) int {
2020-07-11 01:18:52 +03:00
mut h := u32 ( 0 )
2019-08-17 22:19:37 +03:00
if h == 0 && s . len > 0 {
for c in s {
2020-07-11 01:18:52 +03:00
h = h * 31 + u32 ( c )
2019-07-10 10:48:10 +03:00
}
2019-08-17 22:19:37 +03:00
}
2020-07-11 01:18:52 +03:00
return int ( h )
2019-06-22 21:20:28 +03:00
}
2020-12-12 13:10:29 +03:00
// bytes returns the string converted to a byte array.
2022-04-15 14:45:52 +03:00
pub fn ( s string ) bytes ( ) [ ] u8 {
2019-07-15 18:49:01 +03:00
if s . len == 0 {
2019-11-14 09:18:07 +03:00
return [ ]
2019-07-15 18:49:01 +03:00
}
2022-04-15 14:45:52 +03:00
mut buf := [ ] u8 { len : s . len }
2021-08-12 21:46:38 +03:00
unsafe { vmemcpy ( buf . data , s . str , s . len ) }
2019-07-15 18:49:01 +03:00
return buf
}
2019-09-26 22:54:53 +03:00
2020-12-12 13:10:29 +03:00
// repeat returns a new string with `count` number of copies of the string it was called on.
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2019-09-26 22:54:53 +03:00
pub fn ( s string ) repeat ( count int ) string {
2020-02-29 17:25:31 +03:00
if count < 0 {
panic ( ' s t r i n g . r e p e a t : c o u n t i s n e g a t i v e : $ count ' )
} else if count == 0 {
return ' '
} else if count == 1 {
2021-03-18 23:22:43 +03:00
return s . clone ( )
2019-09-26 22:54:53 +03:00
}
2021-06-12 11:27:08 +03:00
mut ret := unsafe { malloc_noscan ( s . len * count + 1 ) }
2019-12-19 23:52:45 +03:00
for i in 0 .. count {
for j in 0 .. s . len {
2020-07-15 22:56:50 +03:00
unsafe {
ret [ i * s . len + j ] = s [ j ]
}
2019-09-28 02:51:42 +03:00
}
2019-09-26 22:54:53 +03:00
}
2021-05-29 16:45:26 +03:00
new_len := s . len * count
2020-07-15 22:56:50 +03:00
unsafe {
2020-08-10 19:05:26 +03:00
ret [ new_len ] = 0
2020-07-15 22:56:50 +03:00
}
2021-05-29 16:45:26 +03:00
return unsafe { ret . vstring_with_len ( new_len ) }
2019-09-26 22:54:53 +03:00
}
2020-03-15 07:46:12 +03:00
2020-12-12 13:10:29 +03:00
// fields returns a string array of the string split by `\t` and ` `
2021-05-11 13:59:44 +03:00
// Example: assert '\t\tv = v'.fields() == ['v', '=', 'v']
// Example: assert ' sss ssss'.fields() == ['sss', 'ssss']
2020-06-08 14:10:47 +03:00
pub fn ( s string ) fields ( ) [ ] string {
2021-03-16 08:29:14 +03:00
mut res := [ ] string { }
mut word_start := 0
2021-03-16 20:45:27 +03:00
mut word_len := 0
2021-03-16 08:29:14 +03:00
mut is_in_word := false
mut is_space := false
for i , c in s {
2021-05-06 00:31:25 +03:00
is_space = c in [ 32 , 9 , 10 ]
2021-03-16 20:45:27 +03:00
if ! is_space {
word_len ++
}
2021-03-16 08:29:14 +03:00
if ! is_in_word && ! is_space {
word_start = i
is_in_word = true
continue
}
if is_space && is_in_word {
2021-03-16 20:45:27 +03:00
res << s [ word_start .. word_start + word_len ]
2021-03-16 08:29:14 +03:00
is_in_word = false
2021-03-16 20:45:27 +03:00
word_len = 0
2021-03-16 08:29:14 +03:00
word_start = 0
continue
}
}
2021-03-16 20:45:27 +03:00
if is_in_word && word_len > 0 {
2021-03-16 08:29:14 +03:00
// collect the remainder word at the end
res << s [ word_start .. s . len ]
}
return res
2020-06-08 14:10:47 +03:00
}
2020-12-12 13:10:29 +03:00
// strip_margin allows multi-line strings to be formatted in a way that removes white-space
2020-03-15 07:46:12 +03:00
// before a delimeter. by default `|` is used.
// Note: the delimiter has to be a byte at this time. That means surrounding
// the value in ``.
2020-03-16 17:46:09 +03:00
//
2020-03-15 07:46:12 +03:00
// Example:
2022-04-02 18:29:12 +03:00
// ```v
2020-03-15 07:46:12 +03:00
// st := 'Hello there,
2022-08-14 08:06:52 +03:00
// | this is a string,
// | Everything before the first | is removed'.strip_margin()
2022-04-02 18:29:12 +03:00
//
// assert st == 'Hello there,
2022-08-14 08:06:52 +03:00
// this is a string,
// Everything before the first | is removed'
2022-04-02 18:29:12 +03:00
// ```
2020-04-11 11:06:03 +03:00
pub fn ( s string ) strip_margin ( ) string {
2020-12-03 18:02:48 +03:00
return s . strip_margin_custom ( ` | ` )
2020-04-11 11:06:03 +03:00
}
2020-12-03 18:02:48 +03:00
2020-12-12 13:10:29 +03:00
// strip_margin_custom does the same as `strip_margin` but will use `del` as delimiter instead of `|`
2021-05-06 00:31:25 +03:00
[ direct_array_access ]
2022-04-15 14:45:52 +03:00
pub fn ( s string ) strip_margin_custom ( del u8 ) string {
2020-04-11 11:06:03 +03:00
mut sep := del
if sep . is_space ( ) {
2021-12-16 16:59:46 +03:00
println ( ' W a r n i n g : ` s t r i p _ m a r g i n ` c a n n o t u s e w h i t e - s p a c e a s a d e l i m i t e r ' )
println ( ' D e f a u l t i n g t o ` | ` ' )
2020-04-11 11:06:03 +03:00
sep = ` | `
2020-03-15 07:46:12 +03:00
}
// don't know how much space the resulting string will be, but the max it
// can be is this big
2021-06-12 11:27:08 +03:00
mut ret := unsafe { malloc_noscan ( s . len + 1 ) }
2020-03-15 07:46:12 +03:00
mut count := 0
for i := 0 ; i < s . len ; i ++ {
2021-05-06 00:31:25 +03:00
if s [ i ] in [ 10 , 13 ] {
2020-07-15 22:56:50 +03:00
unsafe {
ret [ count ] = s [ i ]
}
2020-03-16 17:46:09 +03:00
count ++
// CRLF
2021-05-06 00:31:25 +03:00
if s [ i ] == 13 && i < s . len - 1 && s [ i + 1 ] == 10 {
2020-07-15 22:56:50 +03:00
unsafe {
2020-12-03 18:02:48 +03:00
ret [ count ] = s [ i + 1 ]
2020-07-15 22:56:50 +03:00
}
2020-03-15 07:46:12 +03:00
count ++
2020-03-16 17:46:09 +03:00
i ++
2020-03-15 07:46:12 +03:00
}
for s [ i ] != sep {
i ++
if i >= s . len {
break
}
}
} else {
2020-07-15 22:56:50 +03:00
unsafe {
ret [ count ] = s [ i ]
}
2020-03-15 07:46:12 +03:00
count ++
}
}
2020-07-15 22:56:50 +03:00
unsafe {
ret [ count ] = 0
2020-08-10 19:05:26 +03:00
return ret . vstring_with_len ( count )
2020-07-15 22:56:50 +03:00
}
2020-03-15 07:46:12 +03:00
}
2021-12-20 15:15:51 +03:00
// match_glob matches the string, with a Unix shell-style wildcard pattern.
2022-03-06 20:01:22 +03:00
// Note: wildcard patterns are NOT the same as regular expressions.
2021-12-20 15:15:51 +03:00
// They are much simpler, and do not allow backtracking, captures, etc.
// The special characters used in shell-style wildcards are:
// `*` - matches everything
// `?` - matches any single character
// `[seq]` - matches any of the characters in the sequence
// `[^seq]` - matches any character that is NOT in the sequence
// Any other character in `pattern`, is matched 1:1 to the corresponding
// character in `name`, including / and \.
// You can wrap the meta-characters in brackets too, i.e. `[?]` matches `?`
// in the string, and `[*]` matches `*` in the string.
// Example: assert 'ABCD'.match_glob('AB*')
// Example: assert 'ABCD'.match_glob('*D')
// Example: assert 'ABCD'.match_glob('*B*')
// Example: assert !'ABCD'.match_glob('AB')
[ direct_array_access ]
pub fn ( name string ) match_glob ( pattern string ) bool {
// Initial port based on https://research.swtch.com/glob.go
// See also https://research.swtch.com/glob
mut px := 0
mut nx := 0
mut next_px := 0
mut next_nx := 0
plen := pattern . len
nlen := name . len
for px < plen || nx < nlen {
if px < plen {
c := pattern [ px ]
match c {
` ? ` {
// single-character wildcard
if nx < nlen {
px ++
nx ++
continue
}
}
` * ` {
// zero-or-more-character wildcard
// Try to match at nx.
// If that doesn't work out, restart at nx+1 next.
next_px = px
next_nx = nx + 1
px ++
continue
}
` [ ` {
if nx < nlen {
wanted_c := name [ nx ]
mut bstart := px
mut is_inverted := false
mut inner_match := false
mut inner_idx := bstart + 1
2022-04-20 21:47:09 +03:00
mut inner_c := 0
2021-12-20 15:15:51 +03:00
if inner_idx < plen {
inner_c = pattern [ inner_idx ]
if inner_c == ` ^ ` {
is_inverted = true
inner_idx ++
}
}
for ; inner_idx < plen ; inner_idx ++ {
inner_c = pattern [ inner_idx ]
if inner_c == ` ] ` {
break
}
if inner_c == wanted_c {
inner_match = true
for px < plen && pattern [ px ] != ` ] ` {
px ++
}
break
}
}
if is_inverted {
if inner_match {
return false
} else {
px = inner_idx
}
}
}
px ++
nx ++
continue
}
else {
// an ordinary character
if nx < nlen && name [ nx ] == c {
px ++
nx ++
continue
}
}
}
}
if 0 < next_nx && next_nx <= nlen {
// A mismatch, try restarting:
px = next_px
nx = next_nx
continue
}
return false
}
// Matched all of `pattern` to all of `name`
return true
}
2022-05-17 08:01:03 +03:00
// is_ascii returns true if all characters belong to the US-ASCII set ([` `..`~`])
pub fn ( s string ) is_ascii ( ) bool {
return ! s . bytes ( ) . any ( it < u8 ( ` ` ) || it > u8 ( ` ~ ` ) )
}