From 82fc5fe79fb094e48be81ebb6ccd3e781efbe8c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kr=C3=BCger?= <45282134+UweKrueger@users.noreply.github.com> Date: Fri, 16 Jul 2021 20:40:51 +0200 Subject: [PATCH] docs: document stack/heap considerations and `[heap]` attribute (#10830) --- doc/docs.md | 247 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) diff --git a/doc/docs.md b/doc/docs.md index c393102d33..96165d42d4 100644 --- a/doc/docs.md +++ b/doc/docs.md @@ -116,6 +116,7 @@ For more details and troubleshooting, please visit the [vab GitHub repository](h * [Decoding JSON](#decoding-json) * [Testing](#testing) * [Memory management](#memory-management) + * [Stack and Heap](#stack-and-heap) * [ORM](#orm) @@ -3662,6 +3663,251 @@ fn test() []int { } ``` +### Stack and Heap +#### Stack and Heap Basics + +Like with most other programming languages there are two locations where data can +be stored: + +* The *stack* allows fast allocations with almost zero administrative overhead. The + stack grows and shrinks with the function call depth – so every called + function has its stack segment that remains valid until the function returns. + No freeing is necessary, however, this also means that a reference to a stack + object becomes invalid on function return. Furthermore stack space is + limited (typically to a few Megabytes per thread). +* The *heap* is a large memory area (typically some Gigabytes) that is administrated + by the operating system. Heap objects are allocated and freed by special function + calls that delegate the administrative tasks to the OS. This means that they can + remain valid across several function calls, however, the administration is + expensive. + +#### V's default approach + +Due to performance considerations V tries to put objects on the stack if possible +but allocates them on the heap when obviously necessary. Example: + +```v +struct RefStruct { + r &MyStruct +} + +struct MyStruct { + n int +} + +fn main() { + q, w := f() + println('q: $q.r.n, w: $w.n') +} + +fn f() (RefStruct, &MyStruct) { + a := MyStruct{ + n: 1 + } + b := MyStruct{ + n: 2 + } + c := MyStruct{ + n: 3 + } + e := RefStruct{ + r: &b + } + x := a.n + c.n + println('x: $x') + return e, &c +} +``` + +Here `a` is stored on the stack since it's address never leaves the function `f()`. +However a reference to `b` is part of `e` which is returned. Also a reference to +`c` is returned. For this reason `b` and `c` will be heap allocated. + +Things become less obvious when a reference to an object is passed as function argument: + +```v +struct MyStruct { +mut: + n int +} + +fn main() { + mut q := MyStruct{ + n: 7 + } + w := MyStruct{ + n: 13 + } + x := q.f(&w) // references of `q` and `w` are passed + println('q: $q\nx: $x') +} + +fn (mut a MyStruct) f(b &MyStruct) int { + a.n += b.n + x := a.n * b.n + return x +} +``` +Here the call `q.f(&w)` passes references to `q` and `w` because `a` is +`mut` and `b` is of type `&MyStruct` in `f()`'s declaration, so technically +these references are leaving `main()`. However the *lifetime* of these +references lies inside the scop of `main()` so `q` and `w` are allocated +on the stack. + +#### Manual Control for Stack and Heap + +In the last example the V compiler could put `q` and `w` on the stack +because it assumed that in the call `q.f(&w)` these references were only +used for reading and modifying the referred values – and not to pass the +references itself somewhere else. This can be seen in a way that the +references to `q` and `w` are only *borrowed* to `f()`. + +Things become different if `f()` is doing something with the references itself: + +```v +struct RefStruct { +mut: + r &MyStruct +} + +// see discussion below +[heap] +struct MyStruct { + n int +} + +fn main() { + m := MyStruct{} + mut r := RefStruct{ + r: &m + } + r.g() + println('r: $r') +} + +fn (mut r RefStruct) g() { + s := MyStruct{ + n: 7 + } + r.f(&s) // reference to `s` inside `r` is passed back to `main() ` +} + +fn (mut r RefStruct) f(s &MyStruct) { + r.r = s // would trigger error without `[heap]` +} +``` + +Here `f()` looks quite innocent but is doing nasty things – it inserts a +reference to `s` into `r`. The problem with this is that `s` lives only as long +as `g()` is running but `r` is used in `main()` after that. For this reason +the compiler would complain about the assignment in `f()` because `s` *"might +refer to an object stored on stack"*. The assumption made in `g()` that the call +`r.f(&s)` would only borrow the reference to `s` is wrong. + +A solution to this dilemma is the `[heap]` attribute at the declaration of +`struct MyStruct`. It instructs the compiler to *always* allocate `MyStruct`-objects +on the heap. This way the references to `s` remains valid even after `g()` returns. +The compiler takes into consideration that `MyStruct` objects are always heap +allocated when checking `f()` and allows assigning the reference to `s` to the +`r.r` field. + +There is a pattern often seen in other programming languages: + +```v failcompile +fn (mut a MyStruct) f() &MyStruct { + // do something with a + return &a // would return address of borrowed object +} +``` + +Here `f()` is passed a reference `a` that is passed back to the caller and returned +at the same time. The intention behind such a declaration is method chaining like +`y = x.f().g()`. However, the problem with this approach is that a second reference +to `a` is created – so it is not only borrowed and `MyStruct` has to be +declared as `[heap]`. + +In V the better approach is: + +```v +struct MyStruct { +mut: + n int +} + +fn (mut a MyStruct) f() { + // do something with `a` +} + +fn (mut a MyStruct) g() { + // do something else with `a` +} + +fn main() { + x := MyStruct{} // stack allocated + mut y := x + y.f() + y.g() + // instead of `mut y := x.f().g() +} +``` + +This way the `[heap]` attribute can be avoided – resulting in better performance. + +However, stack space is very limited as mentioned above. For this reason the `[heap]` +attribute might be suitable for very large structures even if not required by use cases +like those mentioned above. + +There is an alternative way to manually control allocation on a case to case basis. This +approach is not recommended but shown here for the sake of completeness: + +```v +struct RefStruct { +mut: + r &MyStruct +} + +struct MyStruct { + n int +} + +fn (mut r RefStruct) f(s &MyStruct) { + r.r = unsafe { s } // override compiler check +} + +fn (mut r RefStruct) g() { + s := &MyStruct{ // `s` explicitly referes to a heap object + n: 7 + } + r.f(s) +} + +fn use_stack() { + x := 7.5 + y := 3.25 + z := x + y + println('$x $y $z') +} + +fn main() { + m := MyStruct{} + mut r := RefStruct{ + r: &m + } + r.g() + use_stack() // to erase invalid stack contents + println('r: $r') +} +``` + +Here the compiler check is suppressed by the `unsafe` block. To make `s` be heap +allocated even without `[heap]` attribute the `struct` literal is prefixed with +an ampersand: `&MyStruct{...}`. + +This last step would not be required by the compiler but without it the reference +inside `r` becomes invalid (the memory area pointed to will be overwritten by +`use_stack()`) and the program might crash (or at least produce an unpredictable +final output). That's why this approach is *unsafe* and should be avoided! + ## ORM (This is still in an alpha state) @@ -4826,6 +5072,7 @@ fn forever() { // The following struct must be allocated on the heap. Therefore, it can only be used as a // reference (`&Window`) or inside another reference (`&OuterStruct{ Window{...} }`). +// See section "Stack and Heap" [heap] struct Window { }