From a39c26507c45dac41f074b54c5a61a6273caebf0 Mon Sep 17 00:00:00 2001
From: Petr Makhnev <51853996+i582@users.noreply.github.com>
Date: Fri, 19 May 2023 17:18:23 +0400
Subject: [PATCH] builtin: speed up string concatenation and `repeat()` method
 with vmemcpy instead of `for` loop for copying data (#18206)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These changes almost do not speed up the program with the `-prod` flag,
since modern С compilers can do such optimization on their own, but in
normal mode, the performance gain is from 1.6 (concatenation) to 1.8 (repeat) times.

Concatenation:
Old (`for` loop):
Time (mean):          3.699 s +- 0.071 s  [User: 3.629 s, System: 0.069 s]
Range (min ... max):  3.548 s ... 3.741 s  10 runs

New (vmemcpy):
Time (mean):          2.305 s +- 0.065 s  [User: 2.263 s, System: 0.041 s]
Range (min ... max):  2.172 s ... 2.355 s  10 runs

`vmemcpy version` ran 1.60 +- 0.05 times faster than 'for loop version'
---
 vlib/builtin/string.v | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v
index 75543bff68..7c4450313f 100644
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@@ -725,15 +725,9 @@ fn (s string) + (a string) string {
 		str: unsafe { malloc_noscan(new_len + 1) }
 		len: new_len
 	}
-	for j in 0 .. s.len {
-		unsafe {
-			res.str[j] = s.str[j]
-		}
-	}
-	for j in 0 .. a.len {
-		unsafe {
-			res.str[s.len + j] = a.str[j]
-		}
+	unsafe {
+		vmemcpy(res.str, s.str, s.len)
+		vmemcpy(res.str + s.len, a.str, a.len)
 	}
 	unsafe {
 		res.str[new_len] = 0 // V strings are not null terminated, but just in case
@@ -2077,10 +2071,8 @@ pub fn (s string) repeat(count int) string {
 	}
 	mut ret := unsafe { malloc_noscan(s.len * count + 1) }
 	for i in 0 .. count {
-		for j in 0 .. s.len {
-			unsafe {
-				ret[i * s.len + j] = s[j]
-			}
+		unsafe {
+			vmemcpy(ret + i * s.len, s.str, s.len)
 		}
 	}
 	new_len := s.len * count