2023-03-28 23:55:57 +03:00
|
|
|
// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
|
2019-06-23 05:21:30 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
2019-09-14 23:48:30 +03:00
|
|
|
module strings
|
2019-06-22 21:20:28 +03:00
|
|
|
|
2021-01-18 15:20:06 +03:00
|
|
|
// strings.Builder is used to efficiently append many strings to a large
|
2020-12-21 23:00:32 +03:00
|
|
|
// dynamically growing buffer, then use the resulting large string. Using
|
|
|
|
// a string builder is much better for performance/memory usage than doing
|
|
|
|
// constantly string concatenation.
|
2022-04-15 15:35:35 +03:00
|
|
|
pub type Builder = []u8
|
2019-06-22 21:20:28 +03:00
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// new_builder returns a new string builder, with an initial capacity of `initial_size`
|
2019-07-03 23:11:27 +03:00
|
|
|
pub fn new_builder(initial_size int) Builder {
|
2022-04-15 15:35:35 +03:00
|
|
|
mut res := Builder([]u8{cap: initial_size})
|
2022-05-11 09:19:37 +03:00
|
|
|
unsafe { res.flags.set(.noslices) }
|
2021-10-31 13:58:55 +03:00
|
|
|
return res
|
2021-03-20 10:02:28 +03:00
|
|
|
}
|
|
|
|
|
2022-09-25 22:54:46 +03:00
|
|
|
// reuse_as_plain_u8_array allows using the Builder instance as a plain []u8 return value.
|
|
|
|
// It is useful, when you have accumulated data in the builder, that you want to
|
|
|
|
// pass/access as []u8 later, without copying or freeing the buffer.
|
|
|
|
// NB: you *should NOT use* the string builder instance after calling this method.
|
|
|
|
// Use only the return value after calling this method.
|
|
|
|
[unsafe]
|
|
|
|
pub fn (mut b Builder) reuse_as_plain_u8_array() []u8 {
|
|
|
|
unsafe { b.flags.clear(.noslices) }
|
|
|
|
return *b
|
|
|
|
}
|
|
|
|
|
2021-03-20 10:02:28 +03:00
|
|
|
// write_ptr writes `len` bytes provided byteptr to the accumulated buffer
|
|
|
|
[unsafe]
|
2022-04-15 18:25:45 +03:00
|
|
|
pub fn (mut b Builder) write_ptr(ptr &u8, len int) {
|
2021-06-08 16:54:18 +03:00
|
|
|
if len == 0 {
|
|
|
|
return
|
|
|
|
}
|
2021-05-31 14:21:06 +03:00
|
|
|
unsafe { b.push_many(ptr, len) }
|
2019-12-11 19:20:46 +03:00
|
|
|
}
|
|
|
|
|
2021-08-19 07:14:20 +03:00
|
|
|
// write_rune appends a single rune to the accumulated buffer
|
|
|
|
[manualfree]
|
|
|
|
pub fn (mut b Builder) write_rune(r rune) {
|
2022-04-15 14:45:52 +03:00
|
|
|
mut buffer := [5]u8{}
|
2021-08-19 07:14:20 +03:00
|
|
|
res := unsafe { utf32_to_str_no_malloc(u32(r), &buffer[0]) }
|
|
|
|
if res.len == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
unsafe { b.push_many(res.str, res.len) }
|
|
|
|
}
|
|
|
|
|
|
|
|
// write_runes appends all the given runes to the accumulated buffer
|
|
|
|
pub fn (mut b Builder) write_runes(runes []rune) {
|
2022-04-15 14:45:52 +03:00
|
|
|
mut buffer := [5]u8{}
|
2021-08-19 07:14:20 +03:00
|
|
|
for r in runes {
|
|
|
|
res := unsafe { utf32_to_str_no_malloc(u32(r), &buffer[0]) }
|
|
|
|
if res.len == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
unsafe { b.push_many(res.str, res.len) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-08 07:15:42 +03:00
|
|
|
// clear clears the buffer contents
|
|
|
|
pub fn (mut b Builder) clear() {
|
|
|
|
b = []u8{cap: b.cap}
|
2019-12-06 23:02:09 +03:00
|
|
|
}
|
|
|
|
|
2022-05-07 20:18:42 +03:00
|
|
|
// write_u8 appends a single `data` byte to the accumulated buffer
|
2022-04-15 18:25:45 +03:00
|
|
|
pub fn (mut b Builder) write_u8(data u8) {
|
2022-01-28 21:34:44 +03:00
|
|
|
b << data
|
|
|
|
}
|
|
|
|
|
2022-04-15 20:45:28 +03:00
|
|
|
// write_byte appends a single `data` byte to the accumulated buffer
|
|
|
|
pub fn (mut b Builder) write_byte(data byte) {
|
|
|
|
b << data
|
|
|
|
}
|
|
|
|
|
2021-02-23 10:42:48 +03:00
|
|
|
// write implements the Writer interface
|
2022-10-16 09:28:57 +03:00
|
|
|
pub fn (mut b Builder) write(data []u8) !int {
|
2021-06-08 16:54:18 +03:00
|
|
|
if data.len == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
2021-05-31 14:21:06 +03:00
|
|
|
b << data
|
2021-02-23 10:42:48 +03:00
|
|
|
return data.len
|
2021-02-22 14:18:11 +03:00
|
|
|
}
|
|
|
|
|
2021-12-05 12:59:18 +03:00
|
|
|
// drain_builder writes all of the `other` builder content, then re-initialises
|
|
|
|
// `other`, so that the `other` strings builder is ready to receive new content.
|
|
|
|
[manualfree]
|
|
|
|
pub fn (mut b Builder) drain_builder(mut other Builder, other_new_cap int) {
|
2023-04-02 00:03:00 +03:00
|
|
|
if other.len > 0 {
|
|
|
|
b << *other
|
|
|
|
}
|
2021-12-05 12:59:18 +03:00
|
|
|
unsafe { other.free() }
|
|
|
|
other = new_builder(other_new_cap)
|
|
|
|
}
|
|
|
|
|
|
|
|
// byte_at returns a byte, located at a given index `i`.
|
2022-03-06 20:01:22 +03:00
|
|
|
// Note: it can panic, if there are not enough bytes in the strings builder yet.
|
2021-05-31 14:21:06 +03:00
|
|
|
[inline]
|
2022-04-15 18:25:45 +03:00
|
|
|
pub fn (b &Builder) byte_at(n int) u8 {
|
2022-05-11 09:19:37 +03:00
|
|
|
return unsafe { (&[]u8(b))[n] }
|
2021-05-31 14:21:06 +03:00
|
|
|
}
|
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// write appends the string `s` to the buffer
|
2020-07-11 13:03:24 +03:00
|
|
|
[inline]
|
2021-02-22 14:18:11 +03:00
|
|
|
pub fn (mut b Builder) write_string(s string) {
|
2021-06-20 20:55:12 +03:00
|
|
|
if s.len == 0 {
|
2019-12-24 20:54:43 +03:00
|
|
|
return
|
|
|
|
}
|
2021-05-31 14:21:06 +03:00
|
|
|
unsafe { b.push_many(s.str, s.len) }
|
2019-12-20 00:29:37 +03:00
|
|
|
// for c in s {
|
|
|
|
// b.buf << c
|
|
|
|
// }
|
2022-04-15 14:58:56 +03:00
|
|
|
// b.buf << []u8(s) // TODO
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// go_back discards the last `n` bytes from the buffer
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut b Builder) go_back(n int) {
|
2021-05-31 14:21:06 +03:00
|
|
|
b.trim(b.len - n)
|
2020-03-11 03:31:24 +03:00
|
|
|
}
|
2020-03-11 01:21:26 +03:00
|
|
|
|
2022-03-25 01:07:15 +03:00
|
|
|
[inline]
|
|
|
|
fn (b &Builder) spart(start_pos int, n int) string {
|
|
|
|
unsafe {
|
|
|
|
mut x := malloc_noscan(n + 1)
|
2022-04-15 14:45:52 +03:00
|
|
|
vmemcpy(x, &u8(b.data) + start_pos, n)
|
2022-03-25 01:07:15 +03:00
|
|
|
x[n] = 0
|
|
|
|
return tos(x, n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// cut_last cuts the last `n` bytes from the buffer and returns them
|
2020-06-17 01:59:33 +03:00
|
|
|
pub fn (mut b Builder) cut_last(n int) string {
|
2021-05-31 14:21:06 +03:00
|
|
|
cut_pos := b.len - n
|
2022-03-25 01:07:15 +03:00
|
|
|
res := b.spart(cut_pos, n)
|
2021-05-31 14:21:06 +03:00
|
|
|
b.trim(cut_pos)
|
2020-07-01 01:53:53 +03:00
|
|
|
return res
|
2020-06-17 01:59:33 +03:00
|
|
|
}
|
|
|
|
|
2021-05-07 19:41:27 +03:00
|
|
|
// cut_to cuts the string after `pos` and returns it.
|
|
|
|
// if `pos` is superior to builder length, returns an empty string
|
|
|
|
// and cancel further operations
|
2020-06-17 01:59:33 +03:00
|
|
|
pub fn (mut b Builder) cut_to(pos int) string {
|
2021-05-07 19:41:27 +03:00
|
|
|
if pos > b.len {
|
|
|
|
return ''
|
|
|
|
}
|
2021-05-31 14:21:06 +03:00
|
|
|
return b.cut_last(b.len - pos)
|
2020-06-17 01:59:33 +03:00
|
|
|
}
|
2021-05-07 19:41:27 +03:00
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// go_back_to resets the buffer to the given position `pos`
|
2022-03-06 20:01:22 +03:00
|
|
|
// Note: pos should be < than the existing buffer length.
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut b Builder) go_back_to(pos int) {
|
2021-05-31 14:21:06 +03:00
|
|
|
b.trim(pos)
|
2020-04-28 14:15:37 +03:00
|
|
|
}
|
|
|
|
|
2020-12-21 23:00:32 +03:00
|
|
|
// writeln appends the string `s`, and then a newline character.
|
2020-07-11 13:03:24 +03:00
|
|
|
[inline]
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut b Builder) writeln(s string) {
|
2022-05-11 09:19:37 +03:00
|
|
|
// for c in s {
|
|
|
|
// b.buf << c
|
|
|
|
// }
|
|
|
|
if s.len > 0 {
|
|
|
|
unsafe { b.push_many(s.str, s.len) }
|
2021-06-08 16:54:18 +03:00
|
|
|
}
|
2022-05-11 09:19:37 +03:00
|
|
|
// b.buf << []u8(s) // TODO
|
|
|
|
b << u8(`\n`)
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2020-03-21 09:01:06 +03:00
|
|
|
// last_n(5) returns 'world'
|
2021-07-25 01:13:07 +03:00
|
|
|
// buf == 'hello world'
|
2020-03-21 09:04:53 +03:00
|
|
|
pub fn (b &Builder) last_n(n int) string {
|
2020-03-21 09:01:06 +03:00
|
|
|
if n > b.len {
|
|
|
|
return ''
|
|
|
|
}
|
2022-03-25 01:07:15 +03:00
|
|
|
return b.spart(b.len - n, n)
|
2020-03-21 09:01:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// after(6) returns 'world'
|
2021-07-25 01:13:07 +03:00
|
|
|
// buf == 'hello world'
|
2020-03-21 09:04:53 +03:00
|
|
|
pub fn (b &Builder) after(n int) string {
|
2020-03-21 09:01:06 +03:00
|
|
|
if n >= b.len {
|
|
|
|
return ''
|
|
|
|
}
|
2022-03-25 01:07:15 +03:00
|
|
|
return b.spart(n, b.len - n)
|
2020-03-21 09:01:06 +03:00
|
|
|
}
|
|
|
|
|
2021-02-04 22:45:35 +03:00
|
|
|
// str returns a copy of all of the accumulated buffer content.
|
2022-06-02 10:41:27 +03:00
|
|
|
// Note: after a call to b.str(), the builder b will be empty, and could be used again.
|
|
|
|
// The returned string *owns* its own separate copy of the accumulated data that was in
|
|
|
|
// the string builder, before the .str() call.
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut b Builder) str() string {
|
2022-04-15 14:58:56 +03:00
|
|
|
b << u8(0)
|
|
|
|
bcopy := unsafe { &u8(memdup_noscan(b.data, b.len)) }
|
2021-05-31 14:21:06 +03:00
|
|
|
s := unsafe { bcopy.vstring_with_len(b.len - 1) }
|
|
|
|
b.trim(0)
|
2020-04-28 11:03:37 +03:00
|
|
|
return s
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2022-03-18 11:36:53 +03:00
|
|
|
// ensure_cap ensures that the buffer has enough space for at least `n` bytes by growing the buffer if necessary
|
|
|
|
pub fn (mut b Builder) ensure_cap(n int) {
|
2022-05-11 09:19:37 +03:00
|
|
|
// code adapted from vlib/builtin/array.v
|
2022-03-18 11:36:53 +03:00
|
|
|
if n <= b.cap {
|
|
|
|
return
|
|
|
|
}
|
2022-05-11 09:19:37 +03:00
|
|
|
|
|
|
|
new_data := vcalloc(n * b.element_size)
|
2022-07-21 20:56:24 +03:00
|
|
|
if b.data != unsafe { nil } {
|
2022-05-11 09:19:37 +03:00
|
|
|
unsafe { vmemcpy(new_data, b.data, b.len * b.element_size) }
|
|
|
|
// TODO: the old data may be leaked when no GC is used (ref-counting?)
|
|
|
|
if b.flags.has(.noslices) {
|
|
|
|
unsafe { free(b.data) }
|
|
|
|
}
|
2022-03-18 11:36:53 +03:00
|
|
|
}
|
|
|
|
unsafe {
|
|
|
|
b.data = new_data
|
|
|
|
b.offset = 0
|
|
|
|
b.cap = n
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-02 10:41:27 +03:00
|
|
|
// free frees the memory block, used for the buffer.
|
|
|
|
// Note: do not use the builder, after a call to free().
|
2021-02-15 18:15:52 +03:00
|
|
|
[unsafe]
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut b Builder) free() {
|
2021-10-29 21:01:07 +03:00
|
|
|
if b.data != 0 {
|
|
|
|
unsafe { free(b.data) }
|
|
|
|
unsafe {
|
2022-07-21 20:56:24 +03:00
|
|
|
b.data = nil
|
2021-10-29 21:01:07 +03:00
|
|
|
}
|
|
|
|
}
|
2019-06-24 23:34:21 +03:00
|
|
|
}
|