From 49382f1f43789e731dc66dc20465e4972f354480 Mon Sep 17 00:00:00 2001 From: spaceface Date: Wed, 11 May 2022 23:05:14 +0200 Subject: [PATCH] gen: make the closure implementation more performant (#14352) --- doc/docs.md | 2 - vlib/v/gen/c/cheaders.v | 113 +++++++++------------------------------- vlib/v/gen/c/fn.v | 87 ++++--------------------------- 3 files changed, 35 insertions(+), 167 deletions(-) diff --git a/doc/docs.md b/doc/docs.md index a9a1480737..627e0ca89d 100644 --- a/doc/docs.md +++ b/doc/docs.md @@ -2422,8 +2422,6 @@ V supports closures too. This means that anonymous functions can inherit variables from the scope they were created in. They must do so explicitly by listing all variables that are inherited. -> Warning: currently works on x64 and arm64 architectures only. - ```v oksyntax my_int := 1 my_closure := fn [my_int] () { diff --git a/vlib/v/gen/c/cheaders.v b/vlib/v/gen/c/cheaders.v index 19b9f2c81c..ce6e5af222 100644 --- a/vlib/v/gen/c/cheaders.v +++ b/vlib/v/gen/c/cheaders.v @@ -69,94 +69,51 @@ fn c_closure_helpers(pref &pref.Preferences) string { builder.write_string(' #ifdef _MSC_VER - #define __RETURN_ADDRESS() _ReturnAddress() + #define __RETURN_ADDRESS() ((char*)_ReturnAddress()) #elif defined(__TINYC__) && defined(_WIN32) - #define __RETURN_ADDRESS() __builtin_return_address(0) + #define __RETURN_ADDRESS() ((char*)__builtin_return_address(0)) #else - #define __RETURN_ADDRESS() __builtin_extract_return_addr(__builtin_return_address(0)) + #define __RETURN_ADDRESS() ((char*)__builtin_extract_return_addr(__builtin_return_address(0))) #endif #ifdef __V_amd64 -#ifdef _WIN32 static const char __closure_thunk[] = { - 0x48, 0x89, 0x0d, 0xc1, 0xff, 0xff, 0xff, // mov qword ptr [rip - 63], rcx # <_orig_rcx> - 0x8f, 0x05, 0xc3, 0xff, 0xff, 0xff, // pop qword ptr [rip - 61] # <_orig_rbp> - 0xff, 0x15, 0xd5, 0xff, 0xff, 0xff, // call qword ptr [rip - 43] # - 0x48, 0x8b, 0x0d, 0xae, 0xff, 0xff, 0xff, // mov rcx, qword ptr [rip - 82] # <_orig_rcx> - 0xff, 0x15, 0xc0, 0xff, 0xff, 0xff, // call qword ptr [rip - 64] # - 0xff, 0x35, 0xaa, 0xff, 0xff, 0xff, // push qword ptr [rip - 86] # <_orig_rbp> - 0xc3 // ret + 0x8f, 0x05, 0xda, 0xff, 0xff, 0xff, // pop QWORD PTR [rip - 0x26] # <_orig_rbp> + 0xff, 0x15, 0xe4, 0xff, 0xff, 0xff, // call QWORD PTR [rip - 0x1C] # + 0xff, 0x25, 0xce, 0xff, 0xff, 0xff, // jmp QWORD PTR [rip - 0x32] # }; -#else -static const char __closure_thunk[] = { - 0x48, 0x89, 0x3d, 0xc1, 0xff, 0xff, 0xff, // mov qword ptr [rip - 63], rdi # <_orig_rdi> - 0x8f, 0x05, 0xc3, 0xff, 0xff, 0xff, // pop qword ptr [rip - 61] # <_orig_rbp> - 0xff, 0x15, 0xd5, 0xff, 0xff, 0xff, // call qword ptr [rip - 43] # - 0x48, 0x8b, 0x3d, 0xae, 0xff, 0xff, 0xff, // mov rdi, qword ptr [rip - 82] # <_orig_rdi> - 0xff, 0x15, 0xc0, 0xff, 0xff, 0xff, // call qword ptr [rip - 64] # - 0xff, 0x35, 0xaa, 0xff, 0xff, 0xff, // push qword ptr [rip - 86] # <_orig_rbp> - 0xc3 // ret -}; -#endif -#define __CLOSURE_WRAPPER_OFFSET 19 -#define __CLOSURE_UNWRAPPER_OFFSET 32 -#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t -#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA , +#define __CLOSURE_DATA_OFFSET 20 #elif defined(__V_x86) static char __closure_thunk[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 - 0x58, // pop eax - 0x8f, 0x40, 0xe3, // pop dword ptr [eax - 29] # <_orig_rbp> - 0xff, 0x50, 0xef, // call dword ptr [eax - 17] # - 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 - 0x58, // pop eax - 0xff, 0x50, 0xdf, // call dword ptr [eax - 33] # - 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 - 0x58, // pop eax - 0xff, 0x70, 0xce, // push dword ptr [eax - 50] # <_orig_rbp> - 0xc3 // ret + 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 + 0x59, // pop ecx + 0x8f, 0x41, 0xeb, // pop DWORD PTR [ecx - 21] # <_orig_rbp> + 0xff, 0x51, 0xf3, // call DWORD PTR [ecx - 13] # + 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 + 0x59, // pop ecx + 0xff, 0x61, 0xdf, // jmp DWORD PTR [ecx - 33] # <_orig_rbp> }; -#define __CLOSURE_WRAPPER_OFFSET 12 -#define __CLOSURE_UNWRAPPER_OFFSET 21 -#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t -#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA , - +#define __CLOSURE_DATA_OFFSET 16 #elif defined(__V_arm64) static char __closure_thunk[] = { 0x10, 0x00, 0x00, 0x10, // adr x16, start - 0x08, 0x82, 0x1c, 0xf8, // str x8, _orig_x8 - 0x1e, 0x02, 0x1d, 0xf8, // str x30, _orig_x30 - 0xf0, 0xfe, 0xff, 0x58, // ldr x16, wrapper + 0x1e, 0x02, 0x1e, 0xf8, // str x30, _orig_x30 + 0x50, 0xff, 0xff, 0x58, // ldr x16, fn 0x00, 0x02, 0x3f, 0xd6, // blr x16 - 0x70, 0xff, 0xff, 0x10, // adr x16, start - 0x08, 0x82, 0x5c, 0xf8, // ldr x8, _orig_x8 - 0x30, 0xfe, 0xff, 0x58, // ldr x16, unwrapper - 0x00, 0x02, 0x3f, 0xd6, // blr x16 - 0xf0, 0xfe, 0xff, 0x10, // adr x16, start - 0x1e, 0x02, 0x5d, 0xf8, // ldr x30, _orig_x30 + 0x9e, 0xfe, 0xff, 0x58, // ldr x30, _orig_x30 0xc0, 0x03, 0x5f, 0xd6 // ret }; -#define __CLOSURE_WRAPPER_OFFSET 20 -#define __CLOSURE_UNWRAPPER_OFFSET 36 -#define __CLOSURE_WRAPPER_EXTRA_PARAM -#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA +#define __CLOSURE_DATA_OFFSET 24 #elif defined(__V_arm32) static char __closure_thunk[] = { - 0x24, 0x00, 0x0f, 0xe5, // str r0, orig_r0 - 0x24, 0xe0, 0x0f, 0xe5, // str lr, orig_lr - 0x1c, 0xc0, 0x1f, 0xe5, // ldr ip, wrapper - 0x3c, 0xff, 0x2f, 0xe1, // blx ip - 0x34, 0x00, 0x1f, 0xe5, // ldr r0, orig_r0 - 0x2c, 0xc0, 0x1f, 0xe5, // ldr ip, unwrapper - 0x3c, 0xff, 0x2f, 0xe1, // blx ip - 0x3c, 0xe0, 0x1f, 0xe5, // ldr lr, orig_lr - 0x1e, 0xff, 0x2f, 0xe1 // bx lr + 0x18, 0xe0, 0x0f, 0xe5, // str lr, orig_lr + 0x14, 0xc0, 0x1f, 0xe5, // ldr ip, fn + 0x3c, 0xff, 0x2f, 0xe1, // blx ip + 0x24, 0xe0, 0x1f, 0xe5, // ldr lr, orig_lr + 0x1e, 0xff, 0x2f, 0xe1 // bx lr }; -#define __CLOSURE_WRAPPER_OFFSET 16 -#define __CLOSURE_UNWRAPPER_OFFSET 28 -#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t -#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA , +#define __CLOSURE_DATA_OFFSET 16 #endif static int _V_PAGE_SIZE = 4096; // pre-initialized to the most common value, in case _vinit is not called (in a DLL, for example) @@ -171,22 +128,7 @@ static inline void __closure_set_function(void* closure, void* f) { p[-2] = f; } -static inline void __closure_set_wrapper(void* closure, void* f) { - void** p = closure; - p[-3] = f; -} - -static inline void __closure_set_unwrapper(void* closure, void* f) { - void** p = closure; - p[-4] = f; -} - -static inline void __closure_set_base_ptr(void* closure, void* bp) { - void** p = closure; - p[-5] = bp; -} - -static void* __closure_create(void* fn, void* wrapper, void* unwrapper, void* data) { +static void* __closure_create(void* fn, void* data) { #ifdef _WIN32 SYSTEM_INFO si; GetNativeSystemInfo(&si); @@ -213,9 +155,6 @@ static void* __closure_create(void* fn, void* wrapper, void* unwrapper, void* da __closure_set_data(closure, data); __closure_set_function(closure, fn); - __closure_set_wrapper(closure, wrapper); - __closure_set_unwrapper(closure, unwrapper); - __closure_set_base_ptr(closure, p); return closure; } ') diff --git a/vlib/v/gen/c/fn.v b/vlib/v/gen/c/fn.v index 60a7820fae..07b87de831 100644 --- a/vlib/v/gen/c/fn.v +++ b/vlib/v/gen/c/fn.v @@ -195,7 +195,7 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) { is_closure := node.scope.has_inherited_vars() mut cur_closure_ctx := '' if is_closure { - cur_closure_ctx, _ = closure_ctx(node) + cur_closure_ctx = closure_ctx(node) // declare the struct before its implementation g.definitions.write_string(cur_closure_ctx) g.definitions.writeln(';') @@ -288,15 +288,6 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) { arg_start_pos := g.out.len fargs, fargtypes, heap_promoted := g.fn_decl_params(node.params, node.scope, node.is_variadic) if is_closure { - mut s := '$cur_closure_ctx *$c.closure_ctx' - if node.params.len > 0 { - s = ', ' + s - } else { - // remove generated `void` - g.out.cut_to(arg_start_pos) - } - g.definitions.write_string(s) - g.write(s) g.nr_closures++ } arg_str := g.out.after(arg_start_pos) @@ -312,6 +303,9 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) { } g.definitions.writeln(');') g.writeln(') {') + if is_closure { + g.writeln('$cur_closure_ctx* $c.closure_ctx = *(void**)(__RETURN_ADDRESS() - __CLOSURE_DATA_OFFSET);') + } for i, is_promoted in heap_promoted { if is_promoted { g.writeln('${fargtypes[i]}* ${fargs[i]} = HEAP(${fargtypes[i]}, _v_toheap_${fargs[i]});') @@ -472,8 +466,8 @@ fn (mut g Gen) c_fn_name(node &ast.FnDecl) ?string { const closure_ctx = '_V_closure_ctx' -fn closure_ctx(node ast.FnDecl) (string, string) { - return 'struct _V_${node.name}_Ctx', 'struct _V_${node.name}_Args' +fn closure_ctx(node ast.FnDecl) string { + return 'struct _V_${node.name}_Ctx' } fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) { @@ -482,73 +476,17 @@ fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) { g.write(node.decl.name) return } - ctx_struct, arg_struct := closure_ctx(node.decl) + ctx_struct := closure_ctx(node.decl) // it may be possible to optimize `memdup` out if the closure never leaves current scope // TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data) - g.write('__closure_create($node.decl.name, ${node.decl.name}_wrapper, ${node.decl.name}_unwrapper, ($ctx_struct*) memdup(&($ctx_struct){') + g.write('__closure_create($node.decl.name, ($ctx_struct*) memdup(&($ctx_struct){') g.indent++ for var in node.inherited_vars { g.writeln('.$var.name = $var.name,') } g.indent-- - ps := g.table.pointer_size - is_big_cutoff := if g.pref.os == .windows || g.pref.arch == .arm32 { ps } else { ps * 2 } - rt_size, _ := g.table.type_size(node.decl.return_type) - is_big := rt_size > is_big_cutoff g.write('}, sizeof($ctx_struct)))') - mut sb := strings.new_builder(512) - ret_styp := g.typ(node.decl.return_type) - - sb.write_string(' VV_LOCAL_SYMBOL void ${node.decl.name}_wrapper(') - if is_big { - sb.write_string('__CLOSURE_WRAPPER_EXTRA_PARAM ') - if node.decl.params.len > 0 { - sb.write_string('__CLOSURE_WRAPPER_EXTRA_PARAM_COMMA ') - } - } - for i, param in node.decl.params { - if i > 0 { - sb.write_string(', ') - } - sb.write_string('${g.typ(param.typ)} a${i + 1}') - } - sb.writeln(') {') - if node.decl.params.len > 0 { - sb.writeln('void** closure_start = (void**)((char*)__RETURN_ADDRESS() - __CLOSURE_WRAPPER_OFFSET); - $arg_struct* args = closure_start[-5];') - for i in 0 .. node.decl.params.len { - sb.writeln('\targs->a${i + 1} = a${i + 1};') - } - } - - sb.writeln('}\n') - - sb.writeln(' VV_LOCAL_SYMBOL $ret_styp ${node.decl.name}_unwrapper(void) { - void** closure_start = (void**)((char*)__RETURN_ADDRESS() - __CLOSURE_UNWRAPPER_OFFSET); - void* userdata = closure_start[-1];') - sb.write_string('\t${g.typ(node.decl.return_type)} (*fn)(') - for i, param in node.decl.params { - sb.write_string('${g.typ(param.typ)} a${i + 1}, ') - } - sb.writeln('void* userdata) = closure_start[-2];') - - if node.decl.params.len > 0 { - sb.writeln('\t$arg_struct* args = closure_start[-5];') - } - - if node.decl.return_type == ast.void_type_idx { - sb.write_string('\tfn(') - } else { - sb.write_string('\treturn fn(') - } - for i in 0 .. node.decl.params.len { - sb.write_string('args->a${i + 1}, ') - } - sb.writeln('userdata); -}') - - g.anon_fn_definitions << sb.str() g.empty_line = false } @@ -559,20 +497,13 @@ fn (mut g Gen) gen_anon_fn_decl(mut node ast.AnonFn) { node.has_gen = true mut builder := strings.new_builder(256) if node.inherited_vars.len > 0 { - ctx_struct, arg_struct := closure_ctx(node.decl) + ctx_struct := closure_ctx(node.decl) builder.writeln('$ctx_struct {') for var in node.inherited_vars { styp := g.typ(var.typ) builder.writeln('\t$styp $var.name;') } builder.writeln('};\n') - if node.decl.params.len > 0 { - builder.writeln('$arg_struct {') - for i, param in node.decl.params { - builder.writeln('\t${g.typ(param.typ)} a${i + 1};') - } - builder.writeln('};\n') - } } pos := g.out.len was_anon_fn := g.anon_fn