1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/builtin/string.v

881 lines
15 KiB
V
Raw Normal View History

2019-06-23 05:21:30 +03:00
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2019-06-22 21:20:28 +03:00
module builtin
struct string {
//mut:
2019-08-17 22:19:37 +03:00
//hash_cache int
2019-06-22 21:20:28 +03:00
pub:
2019-06-23 01:24:16 +03:00
str byteptr
2019-06-22 21:20:28 +03:00
len int
}
struct ustring {
pub:
s string
runes []int
len int
}
// For C strings only
fn C.strlen(s byteptr) int
2019-08-17 22:19:37 +03:00
fn todo() { }
2019-08-17 22:19:37 +03:00
// Converts a C string to a V string.
// String data is reused, not copied.
pub fn tos(s byteptr, len int) string {
2019-06-22 21:20:28 +03:00
// This should never happen.
if isnil(s) {
panic('tos(): nil string')
}
return string {
str: s
len: len
}
}
pub fn tos_clone(s byteptr) string {
2019-06-22 21:20:28 +03:00
if isnil(s) {
panic('tos: nil string')
}
2019-08-16 15:05:11 +03:00
return tos2(s).clone()
2019-06-22 21:20:28 +03:00
}
2019-08-17 22:19:37 +03:00
// Same as `tos`, but calculates the length. Called by `string(bytes)` casts.
// Used only internally.
2019-06-22 21:20:28 +03:00
fn tos2(s byteptr) string {
if isnil(s) {
panic('tos2: nil string')
}
len := C.strlen(s)
res := tos(s, len)
return res
}
pub fn (a string) clone() string {
2019-06-22 21:20:28 +03:00
mut b := string {
len: a.len
str: malloc(a.len + 1)
}
for i := 0; i < a.len; i++ {
b[i] = a[i]
}
b[a.len] = `\0`
return b
}
2019-08-17 22:19:37 +03:00
/*
pub fn (s string) cstr() byteptr {
2019-06-22 21:20:28 +03:00
clone := s.clone()
return clone.str
}
2019-08-17 22:19:37 +03:00
*/
2019-06-22 21:20:28 +03:00
pub fn (s string) replace(rep, with string) string {
if s.len == 0 || rep.len == 0 {
2019-06-28 18:17:54 +03:00
return s
2019-06-22 21:20:28 +03:00
}
// println('"$s" replace "$rep" with "$with" rep.len=$rep.len')
// TODO PERF Allocating ints is expensive. Should be a stack array
// Get locations of all reps within this string
mut idxs := []int{}
2019-06-29 18:29:29 +03:00
mut rem := s
mut rstart := 0
for {
mut i := rem.index(rep)
if i < 0 {break}
idxs << rstart + i
i += rep.len
rstart += i
rem = rem.substr(i, rem.len)
2019-06-22 21:20:28 +03:00
}
// Dont change the string if there's nothing to replace
if idxs.len == 0 {
return s
}
// Now we know the number of replacements we need to do and we can calc the len of the new string
new_len := s.len + idxs.len * (with.len - rep.len)
mut b := malloc(new_len + 1)// add a newline just in case
// Fill the new string
mut idx_pos := 0
mut cur_idx := idxs[idx_pos]
mut b_i := 0
2019-06-22 21:20:28 +03:00
for i := 0; i < s.len; i++ {
// Reached the location of rep, replace it with "with"
if i == cur_idx {
for j := 0; j < with.len; j++ {
b[b_i] = with[j]
b_i++
}
// Skip the length of rep, since we just replaced it with "with"
i += rep.len - 1
// Go to the next index
idx_pos++
if idx_pos < idxs.len {
cur_idx = idxs[idx_pos]
}
}
// Rep doesnt start here, just copy
else {
b[b_i] = s[i]
b_i++
}
}
b[new_len] = `\0`
return tos(b, new_len)
}
pub fn (s string) int() int {
2019-06-25 15:56:34 +03:00
return C.atoi(s.str)
}
pub fn (s string) i64() i64 {
return C.atoll(s.str)
}
pub fn (s string) f32() f32 {
2019-06-26 20:03:35 +03:00
return C.atof(s.str)
}
pub fn (s string) f64() f64 {
return C.atof(s.str)
}
pub fn (s string) u32() u32 {
return C.strtoul(s.str, 0, 0)
}
pub fn (s string) u64() u64 {
return C.strtoull(s.str, 0, 0)
//return C.atoll(s.str) // temporary fix for tcc on windows.
}
2019-06-22 21:20:28 +03:00
// ==
fn (s string) eq(a string) bool {
if isnil(s.str) {
panic('string.eq(): nil string')
}
if s.len != a.len {
return false
}
for i := 0; i < s.len; i++ {
if s[i] != a[i] {
return false
}
}
return true
}
// !=
fn (s string) ne(a string) bool {
return !s.eq(a)
}
2019-06-23 21:25:50 +03:00
// s < a
fn (s string) lt(a string) bool {
2019-06-22 21:20:28 +03:00
for i := 0; i < s.len; i++ {
2019-06-23 21:25:50 +03:00
if i >= a.len || s[i] > a[i] {
2019-06-22 21:20:28 +03:00
return false
}
2019-06-23 21:25:50 +03:00
else if s[i] < a[i] {
2019-06-22 21:20:28 +03:00
return true
}
}
2019-06-23 21:25:50 +03:00
if s.len < a.len {
return true
}
return false
2019-06-22 21:20:28 +03:00
}
// s <= a
fn (s string) le(a string) bool {
2019-06-23 21:25:50 +03:00
return s.lt(a) || s.eq(a)
2019-06-22 21:20:28 +03:00
}
// s > a
fn (s string) gt(a string) bool {
2019-06-23 21:25:50 +03:00
return !s.le(a)
}
// s >= a
fn (s string) ge(a string) bool {
return !s.lt(a)
2019-06-22 21:20:28 +03:00
}
// TODO `fn (s string) + (a string)` ? To be consistent with operator overloading syntax.
2019-07-20 17:43:33 +03:00
fn (s string) add(a string) string {
2019-06-22 21:20:28 +03:00
new_len := a.len + s.len
mut res := string {
len: new_len
str: malloc(new_len + 1)
}
for j := 0; j < s.len; j++ {
res[j] = s[j]
}
for j := 0; j < a.len; j++ {
res[s.len + j] = a[j]
}
res[new_len] = `\0`// V strings are not null terminated, but just in case
return res
}
pub fn (s string) split(delim string) []string {
// println('string split delim="$delim" s="$s"')
mut res := []string
if delim.len == 0 {
res << s
return res
}
if delim.len == 1 {
return s.split_single(delim[0])
}
mut i := 0
mut start := 0// - 1
for i < s.len {
// printiln(i)
mut a := s[i] == delim[0]
mut j := 1
for j < delim.len && a {
a = a && s[i + j] == delim[j]
j++
}
last := i == s.len - 1
if a || last {
if last {
i++
}
mut val := s.substr(start, i)
// println('got it "$val" start=$start i=$i delim="$delim"')
if val.len > 0 {
// todo perf
// val now is '___VAL'. remove '___' from the start
if val.starts_with(delim) {
// println('!!')
val = val.right(delim.len)
}
res << val.trim_space()
}
start = i
}
i++
}
return res
}
pub fn (s string) split_single(delim byte) []string {
2019-06-22 21:20:28 +03:00
mut res := []string
if int(delim) == 0 {
res << s
return res
}
mut i := 0
mut start := 0
for i < s.len {
2019-07-31 08:26:22 +03:00
is_delim := s[i] == delim
last := i == s.len - 1
if is_delim || last {
if !is_delim && i == s.len - 1 {
2019-06-22 21:20:28 +03:00
i++
}
val := s.substr(start, i)
if val.len > 0 {
2019-08-17 22:19:37 +03:00
res << val
2019-06-22 21:20:28 +03:00
}
start = i + 1
}
i++
}
return res
}
pub fn (s string) split_into_lines() []string {
mut res := []string
if s.len == 0 {
return res
}
mut start := 0
for i := 0; i < s.len; i++ {
last := i == s.len - 1
if int(s[i]) == 10 || last {
if last {
i++
}
line := s.substr(start, i)
res << line
start = i + 1
}
}
return res
}
// 'hello'.left(2) => 'he'
pub fn (s string) left(n int) string {
if n >= s.len {
return s
}
return s.substr(0, n)
}
2019-08-26 13:32:53 +03:00
// 'hello'.right(2) => 'llo'
2019-06-22 21:20:28 +03:00
pub fn (s string) right(n int) string {
if n >= s.len {
return ''
}
return s.substr(n, s.len)
}
2019-08-17 22:19:37 +03:00
// substr
2019-06-22 21:20:28 +03:00
pub fn (s string) substr(start, end int) string {
if start > end || start > s.len || end > s.len || start < 0 || end < 0 {
2019-06-22 21:20:28 +03:00
panic('substr($start, $end) out of bounds (len=$s.len)')
}
len := end - start
mut res := string {
len: len
str: malloc(len + 1)
}
for i := 0; i < len; i++ {
res.str[i] = s.str[start + i]
}
res.str[len] = `\0`
2019-08-17 22:19:37 +03:00
/*
2019-06-22 21:20:28 +03:00
res := string {
str: s.str + start
len: len
}
2019-08-17 22:19:37 +03:00
*/
return res
2019-06-22 21:20:28 +03:00
}
// KMP search
2019-06-22 21:20:28 +03:00
pub fn (s string) index(p string) int {
if p.len > s.len {
return -1
}
mut prefix := [0; p.len]
mut j := 0
for i := 1; i < p.len; i++ {
for p[j] != p[i] && j > 0 {
j = prefix[j - 1]
}
if p[j] == p[i] {
j++
}
prefix[i] = j
}
j = 0
for i := 0; i < s.len; i++ {
for p[j] != s[i] && j > 0 {
j = prefix[j - 1]
}
if p[j] == s[i] {
2019-06-22 21:20:28 +03:00
j++
}
if j == p.len {
return i - p.len + 1
2019-06-22 21:20:28 +03:00
}
}
return -1
}
2019-08-01 16:01:03 +03:00
pub fn (s string) index_any(chars string) int {
for c in chars {
index := s.index(c.str())
if index != -1 {
return index
}
}
return -1
}
2019-06-22 21:20:28 +03:00
pub fn (s string) last_index(p string) int {
if p.len > s.len {
return -1
}
mut i := s.len - p.len
for i >= 0 {
mut j := 0
for j < p.len && s[i + j] == p[j] {
j++
}
if j == p.len {
return i
}
i--
}
return -1
}
pub fn (s string) index_after(p string, start int) int {
if p.len > s.len {
return -1
}
mut strt := start
if start < 0 {
strt = 0
}
if start >= s.len {
return -1
}
mut i := strt
for i < s.len {
mut j := 0
mut ii := i
for j < p.len && s[ii] == p[j] {
j++
ii++
}
if j == p.len {
return i
}
i++
}
return -1
}
2019-08-03 00:30:22 +03:00
// counts occurrences of substr in s
pub fn (s string) count(substr string) int {
if s.len == 0 || substr.len == 0 {
return 0
}
2019-08-26 13:32:53 +03:00
if substr.len > s.len {
return 0
}
2019-08-03 00:30:22 +03:00
mut n := 0
2019-08-03 01:18:19 +03:00
mut i := 0
2019-08-03 00:30:22 +03:00
for {
i = s.index_after(substr, i)
2019-08-03 00:30:22 +03:00
if i == -1 {
return n
}
2019-08-03 01:18:19 +03:00
i += substr.len
2019-08-03 00:30:22 +03:00
n++
}
return 0 // TODO can never get here - v doesn't know that
2019-08-03 00:30:22 +03:00
}
2019-06-22 21:20:28 +03:00
pub fn (s string) contains(p string) bool {
res := s.index(p) > 0 - 1
return res
}
pub fn (s string) starts_with(p string) bool {
res := s.index(p) == 0
return res
}
pub fn (s string) ends_with(p string) bool {
if p.len > s.len {
return false
}
res := s.last_index(p) == s.len - p.len
return res
}
// TODO only works with ASCII
pub fn (s string) to_lower() string {
2019-08-26 13:32:53 +03:00
mut b := malloc(s.len + 1)
2019-06-22 21:20:28 +03:00
for i := 0; i < s.len; i++ {
b[i] = C.tolower(s.str[i])
}
return tos(b, s.len)
}
pub fn (s string) to_upper() string {
2019-08-26 13:32:53 +03:00
mut b := malloc(s.len + 1)
2019-06-22 21:20:28 +03:00
for i := 0; i < s.len; i++ {
b[i] = C.toupper(s.str[i])
}
return tos(b, s.len)
}
2019-08-26 13:32:53 +03:00
pub fn (s string) capitalize() string {
sl := s.to_lower()
cap := sl[0].str().to_upper() + sl.right(1)
2019-09-01 22:37:22 +03:00
return cap
2019-08-26 13:32:53 +03:00
}
pub fn (s string) title() string {
words := s.split(' ')
mut tit := []string
for word in words {
tit << word.capitalize()
}
title := tit.join(' ')
return title
}
2019-06-22 21:20:28 +03:00
// 'hey [man] how you doin'
// find_between('[', ']') == 'man'
pub fn (s string) find_between(start, end string) string {
2019-06-22 21:20:28 +03:00
start_pos := s.index(start)
if start_pos == -1 {
return ''
}
// First get everything to the right of 'start'
val := s.right(start_pos + start.len)
end_pos := val.index(end)
if end_pos == -1 {
return val
}
return val.left(end_pos)
}
// TODO generic
2019-06-30 14:06:46 +03:00
pub fn (ar []string) contains(val string) bool {
2019-06-22 21:20:28 +03:00
for s in ar {
if s == val {
return true
}
}
return false
}
// TODO generic
2019-06-30 14:06:46 +03:00
pub fn (ar []int) contains(val int) bool {
2019-06-22 21:20:28 +03:00
for i, s in ar {
if s == val {
return true
}
}
return false
}
2019-08-17 22:19:37 +03:00
/*
2019-06-30 14:06:46 +03:00
pub fn (a []string) to_c() voidptr {
2019-08-17 22:19:37 +03:00
mut res := malloc(sizeof(byteptr) * a.len)
2019-06-22 21:20:28 +03:00
for i := 0; i < a.len; i++ {
val := a[i]
2019-08-17 22:19:37 +03:00
res[i] = val.str
2019-06-22 21:20:28 +03:00
}
2019-08-17 22:19:37 +03:00
return res
2019-06-22 21:20:28 +03:00
}
2019-08-17 22:19:37 +03:00
*/
2019-06-22 21:20:28 +03:00
fn is_space(c byte) bool {
return c in [` `,`\n`,`\t`,`\v`,`\f`,`\r`]
2019-06-22 21:20:28 +03:00
}
pub fn (c byte) is_space() bool {
2019-06-22 21:20:28 +03:00
return is_space(c)
}
pub fn (s string) trim_space() string {
return s.trim(' \n\t\v\f\r')
2019-06-22 21:20:28 +03:00
}
pub fn (s string) trim(cutset string) string {
if s.len < 1 || cutset.len < 1 {
return s
2019-06-22 21:20:28 +03:00
}
cs_arr := cutset.bytes()
mut pos_left := 0
mut pos_right := s.len - 1
mut cs_match := true
for pos_left <= s.len && pos_right >= -1 && cs_match {
cs_match = false
if s[pos_left] in cs_arr {
pos_left++
cs_match = true
}
if s[pos_right] in cs_arr {
pos_right--
cs_match = true
}
if pos_left > pos_right {
return ''
}
2019-06-22 21:20:28 +03:00
}
return s.substr(pos_left, pos_right+1)
2019-06-22 21:20:28 +03:00
}
pub fn (s string) trim_left(cutset string) string {
if s.len < 1 || cutset.len < 1 {
2019-06-22 21:20:28 +03:00
return s
}
cs_arr := cutset.bytes()
mut pos := 0
for pos <= s.len && s[pos] in cs_arr {
pos++
2019-06-22 21:20:28 +03:00
}
return s.right(pos)
2019-06-22 21:20:28 +03:00
}
pub fn (s string) trim_right(cutset string) string {
if s.len < 1 || cutset.len < 1 {
2019-08-17 22:19:37 +03:00
return s
}
cs_arr := cutset.bytes()
mut pos := s.len - 1
for pos >= -1 && s[pos] in cs_arr {
2019-08-17 22:19:37 +03:00
pos--
}
2019-08-17 20:07:40 +03:00
return s.left(pos+1)
2019-06-22 21:20:28 +03:00
}
// fn print_cur_thread() {
// //C.printf("tid = %08x \n", pthread_self());
// }
2019-09-01 22:51:16 +03:00
fn compare_strings(a, b &string) int {
if a.lt(b) {
2019-06-22 21:20:28 +03:00
return -1
}
if a.gt(b) {
2019-06-22 21:20:28 +03:00
return 1
}
return 0
}
2019-09-01 22:51:16 +03:00
fn compare_strings_by_len(a, b &string) int {
2019-06-22 21:20:28 +03:00
if a.len < b.len {
return -1
}
if a.len > b.len {
return 1
}
return 0
}
2019-09-01 22:51:16 +03:00
fn compare_lower_strings(a, b &string) int {
2019-06-22 21:20:28 +03:00
aa := a.to_lower()
2019-06-25 07:29:02 +03:00
bb := b.to_lower()
2019-06-22 21:20:28 +03:00
return compare_strings(aa, bb)
}
pub fn (s mut []string) sort() {
s.sort_with_compare(compare_strings)
}
pub fn (s mut []string) sort_ignore_case() {
2019-06-22 21:20:28 +03:00
s.sort_with_compare(compare_lower_strings)
}
pub fn (s mut []string) sort_by_len() {
2019-06-22 21:20:28 +03:00
s.sort_with_compare(compare_strings_by_len)
}
pub fn (s string) ustring() ustring {
2019-06-22 21:20:28 +03:00
mut res := ustring {
s: s
// runes will have at least s.len elements, save reallocations
// TODO use VLA for small strings?
runes: new_array(0, s.len, sizeof(int))
}
for i := 0; i < s.len; i++ {
char_len := utf8_char_len(s.str[i])
2019-06-22 21:20:28 +03:00
res.runes << i
i += char_len - 1
res.len++
}
return res
}
// A hack that allows to create ustring without allocations.
// It's called from functions like draw_text() where we know that the string is going to be freed
// right away. Uses global buffer for storing runes []int array.
__global g_ustring_runes []int
pub fn (s string) ustring_tmp() ustring {
if g_ustring_runes.len == 0 {
g_ustring_runes = new_array(0, 128, sizeof(int))
}
2019-06-22 21:20:28 +03:00
mut res := ustring {
s: s
}
res.runes = g_ustring_runes
res.runes.len = s.len
2019-06-22 21:20:28 +03:00
mut j := 0
for i := 0; i < s.len; i++ {
char_len := utf8_char_len(s.str[i])
2019-06-22 21:20:28 +03:00
res.runes[j] = i
j++
i += char_len - 1
res.len++
}
return res
}
2019-08-06 19:13:04 +03:00
pub fn (u ustring) substr(_start, _end int) string {
start := u.runes[_start]
end := if _end >= u.runes.len {
u.s.len
2019-06-22 21:20:28 +03:00
}
else {
2019-08-06 19:13:04 +03:00
u.runes[_end]
2019-06-22 21:20:28 +03:00
}
return u.s.substr(start, end)
2019-06-22 21:20:28 +03:00
}
2019-07-07 22:46:21 +03:00
pub fn (u ustring) left(pos int) string {
2019-06-22 21:20:28 +03:00
return u.substr(0, pos)
}
2019-07-07 22:46:21 +03:00
pub fn (u ustring) right(pos int) string {
2019-06-22 21:20:28 +03:00
return u.substr(pos, u.len)
}
fn (s string) at(idx int) byte {
if idx < 0 || idx >= s.len {
panic('string index out of range: $idx / $s.len')
}
return s.str[idx]
}
pub fn (u ustring) at(idx int) string {
2019-06-22 21:20:28 +03:00
return u.substr(idx, idx + 1)
}
fn (u ustring) free() {
u.runes.free()
}
pub fn (c byte) is_digit() bool {
2019-06-22 21:20:28 +03:00
return c >= `0` && c <= `9`
}
2019-07-24 01:06:48 +03:00
pub fn (c byte) is_hex_digit() bool {
return c.is_digit() || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`)
}
pub fn (c byte) is_oct_digit() bool {
return c >= `0` && c <= `7`
}
pub fn (c byte) is_letter() bool {
2019-06-22 21:20:28 +03:00
return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
}
pub fn (s string) free() {
2019-07-25 01:25:21 +03:00
free(s.str)
2019-06-22 21:20:28 +03:00
}
2019-08-17 22:19:37 +03:00
/*
2019-06-30 14:06:46 +03:00
fn (arr []string) free() {
2019-06-22 21:20:28 +03:00
for s in arr {
s.free()
}
C.free(arr.data)
}
2019-08-17 22:19:37 +03:00
*/
2019-06-22 21:20:28 +03:00
// all_before('23:34:45.234', '.') == '23:34:45'
pub fn (s string) all_before(dot string) string {
2019-06-22 21:20:28 +03:00
pos := s.index(dot)
if pos == -1 {
return s
}
return s.left(pos)
}
pub fn (s string) all_before_last(dot string) string {
2019-06-22 21:20:28 +03:00
pos := s.last_index(dot)
if pos == -1 {
return s
}
return s.left(pos)
}
pub fn (s string) all_after(dot string) string {
2019-06-22 21:20:28 +03:00
pos := s.last_index(dot)
if pos == -1 {
return s
}
return s.right(pos + dot.len)
}
// fn (s []string) substr(a, b int) string {
// return join_strings(s.slice_fast(a, b))
// }
2019-06-30 14:06:46 +03:00
pub fn (a []string) join(del string) string {
2019-06-22 21:20:28 +03:00
if a.len == 0 {
return ''
}
mut len := 0
for i, val in a {
len += val.len + del.len
}
len -= del.len
// Allocate enough memory
mut res := ''
res.len = len
res.str = malloc(res.len + 1)
mut idx := 0
// Go thru every string and copy its every char one by one
for i, val in a {
for j := 0; j < val.len; j++ {
c := val[j]
res.str[idx] = val.str[j]
idx++
}
// Add del if it's not last
if i != a.len - 1 {
for k := 0; k < del.len; k++ {
res.str[idx] = del.str[k]
idx++
}
}
}
res.str[res.len] = `\0`
return res
}
2019-06-30 14:06:46 +03:00
pub fn (s []string) join_lines() string {
2019-06-22 21:20:28 +03:00
return s.join('\n')
}
pub fn (s string) reverse() string {
2019-06-27 03:03:19 +03:00
mut res := string {
len: s.len
2019-07-07 18:43:34 +03:00
str: malloc(s.len)
2019-06-27 03:03:19 +03:00
}
for i := s.len - 1; i >= 0; i-- {
res[s.len-i-1] = s[i]
2019-06-27 03:03:19 +03:00
}
return res
}
2019-06-22 21:20:28 +03:00
// 'hello'.limit(2) => 'he'
// 'hi'.limit(10) => 'hi'
pub fn (s string) limit(max int) string {
2019-06-22 21:20:28 +03:00
u := s.ustring()
if u.len <= max {
return s
}
return u.substr(0, max)
}
// TODO is_white_space()
pub fn (c byte) is_white() bool {
2019-06-22 21:20:28 +03:00
i := int(c)
return i == 10 || i == 32 || i == 9 || i == 13 || c == `\r`
}
pub fn (s string) hash() int {
2019-08-17 22:19:37 +03:00
//mut h := s.hash_cache
mut h := 0
if h == 0 && s.len > 0 {
for c in s {
h = h * 31 + int(c)
2019-07-10 10:48:10 +03:00
}
2019-08-17 22:19:37 +03:00
}
return h
2019-06-22 21:20:28 +03:00
}
2019-07-15 18:49:01 +03:00
pub fn (s string) bytes() []byte {
if s.len == 0 {
return []byte
}
mut buf := [byte(0); s.len]
C.memcpy(buf.data, s.str, s.len)
return buf
}