diff --git a/vlib/v/gen/native/amd64.v b/vlib/v/gen/native/amd64.v index 1bc168cd68..aa787d1aeb 100644 --- a/vlib/v/gen/native/amd64.v +++ b/vlib/v/gen/native/amd64.v @@ -7,6 +7,7 @@ pub struct Amd64 { mut: g &Gen // arm64 specific stuff for code generation + is_16bit_aligned bool } // The registers are ordered for faster generation @@ -210,9 +211,14 @@ fn (mut g Gen) cmp_var_reg(var Var, reg Register, config VarConfig) { // TODO: size g.write8(0x48) // 83 for 1 byte? g.write8(0x39) - g.write8(0x45) offset := var.offset - config.offset - g.write8(0xff - offset + 1) + is_far_var := offset > 0x80 || offset < -0x7f + g.write8(if is_far_var { 0x85 } else { 0x45 }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.println('cmp var `$var.name`, $reg') } GlobalVar { @@ -241,9 +247,14 @@ fn (mut g Gen) cmp_var(var Var, val int, config VarConfig) { LocalVar { // TODO: size g.write8(0x81) // 83 for 1 byte? - g.write8(0x7d) offset := var.offset - config.offset - g.write8(0xff - offset + 1) + is_far_var := offset > 0x80 || offset < -0x7f + g.write8(if is_far_var { 0xbd } else { 0x7d }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.write32(val) g.println('cmp var `$var.name` $val') } @@ -273,9 +284,15 @@ fn (mut g Gen) dec_var(var Var, config VarConfig) { } LocalVar { // TODO: size - g.write16(0x6d81) // 83 for 1 byte + g.write8(0x81) // 83 for 1 byte offset := var.offset - config.offset - g.write8(0xff - offset + 1) + is_far_var := offset > 0x80 || offset < -0x7f + g.write8(if is_far_var { 0xad } else { 0x6d }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.write32(1) g.println('dec_var `$var.name`') } @@ -305,9 +322,15 @@ fn (mut g Gen) inc_var(var Var, config VarConfig) { } LocalVar { // TODO: size - g.write16(0x4581) // 83 for 1 byte + g.write8(0x81) // 83 for 1 byte offset := var.offset - config.offset - g.write8(0xff - offset + 1) + is_far_var := offset > 0x80 || offset < -0x7f + g.write8(if is_far_var { 0x85 } else { 0x45 }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.write32(1) g.println('inc_var `$var.name`') } @@ -429,7 +452,7 @@ fn (mut g Gen) mov64(reg Register, val i64) { g.write8(0x48) g.write8(0xc7) g.write8(0xc2) - g.write32(i32(int(val))) + g.write32(int(val)) g.println('mov32 $reg, $val') return } @@ -499,40 +522,58 @@ fn (mut g Gen) mov_reg_to_var(var Var, reg Register, config VarConfig) { } LocalVar { offset := var.offset - config.offset + is_far_var := offset > 0x80 || offset < -0x7f typ := if config.typ == 0 { var.typ } else { config.typ } - size := match typ { + mut size := 'UNKNOWN' + is_extended_register := int(reg) >= int(Register.r8) && int(reg) <= int(Register.r15) + match typ { ast.i64_type_idx, ast.u64_type_idx, ast.isize_type_idx, ast.usize_type_idx, ast.int_literal_type_idx { - g.write16(0x8948) - 'QWORD' + g.write16(0x8948 + if is_extended_register { 4 } else { 0 }) + size = 'QWORD' } ast.int_type_idx, ast.u32_type_idx, ast.rune_type_idx { + if is_extended_register { + g.write8(0x44) + } g.write8(0x89) - 'DWORD' + size = 'DWORD' } ast.i16_type_idx, ast.u16_type_idx { - g.write16(0x8966) - 'WORD' + g.write8(0x66) + if is_extended_register { + g.write8(0x44) + } + g.write8(0x89) + size = 'WORD' } ast.i8_type_idx, ast.u8_type_idx, ast.char_type_idx, ast.bool_type_idx { + if is_extended_register { + g.write8(0x44) + } g.write8(0x88) - 'BYTE' + size = 'BYTE' } else { g.n_error('unsupported type for mov_reg_to_var') 'UNKNOWN' } } + far_var_offset := if is_far_var { 0x40 } else { 0 } match reg { - .eax, .rax { g.write8(0x45) } - .edi, .rdi { g.write8(0x7d) } - .rsi { g.write8(0x75) } - .rdx { g.write8(0x55) } - .rcx { g.write8(0x4d) } + .eax, .rax, .r8 { g.write8(0x45 + far_var_offset) } + .edi, .rdi { g.write8(0x7d + far_var_offset) } + .rsi { g.write8(0x75 + far_var_offset) } + .rdx { g.write8(0x55 + far_var_offset) } + .rcx, .r9 { g.write8(0x4d + far_var_offset) } else { g.n_error('mov_from_reg $reg') } } - g.write8(0xff - offset + 1) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.println('mov $size PTR [rbp-$offset.hex2()],$reg') } GlobalVar { @@ -560,13 +601,17 @@ fn (mut g Gen) mov_int_to_var(var Var, integer int, config VarConfig) { LocalVar { offset := var.offset - config.offset typ := if config.typ == 0 { var.typ } else { config.typ } - var_addr := 0xff - offset + 1 + is_far_var := offset > 0x80 || offset < -0x7f match typ { ast.i8_type_idx, ast.u8_type_idx, ast.char_type_idx { g.write8(0xc6) - g.write8(0x45) - g.write8(var_addr) + g.write8(if is_far_var { 0x85 } else { 0x45 }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.write8(u8(integer)) g.println('mov BYTE PTR[rbp-$offset.hex2()], $integer') } @@ -574,8 +619,12 @@ fn (mut g Gen) mov_int_to_var(var Var, integer int, config VarConfig) { ast.int_literal_type_idx { g.write8(0x48) g.write8(0xc7) - g.write8(0x45) - g.write8(var_addr) + g.write8(if is_far_var { 0x85 } else { 0x45 }) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.write32(integer) g.println('mov QWORD PTR[rbp-$offset.hex2()], $integer') } @@ -591,6 +640,7 @@ fn (mut g Gen) mov_int_to_var(var Var, integer int, config VarConfig) { } fn (mut g Gen) lea_var_to_reg(reg Register, var_offset int) { + is_far_var := var_offset > 0x80 || var_offset < -0x7f match reg { .rax, .rbx, .rsi, .rdi { g.write8(0x48) @@ -598,16 +648,21 @@ fn (mut g Gen) lea_var_to_reg(reg Register, var_offset int) { else {} } g.write8(0x8d) + far_var_offset := if is_far_var { 0x40 } else { 0 } match reg { - .eax, .rax { g.write8(0x45) } - .edi, .rdi { g.write8(0x7d) } - .rsi { g.write8(0x75) } - .rdx { g.write8(0x55) } - .rbx { g.write8(0x5d) } - .rcx { g.write8(0x4d) } + .eax, .rax { g.write8(0x45 + far_var_offset) } + .edi, .rdi { g.write8(0x7d + far_var_offset) } + .rsi { g.write8(0x75 + far_var_offset) } + .rdx { g.write8(0x55 + far_var_offset) } + .rbx { g.write8(0x5d + far_var_offset) } + .rcx { g.write8(0x4d + far_var_offset) } else { g.n_error('lea_var_to_reg $reg') } } - g.write8(0xff - var_offset + 1) + if is_far_var { + g.write32(int((0xffffffff - i64(var_offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - var_offset + 1) % 0x100) + } g.println('lea $reg, [rbp-$var_offset.hex2()]') } @@ -629,6 +684,7 @@ fn (mut g Gen) mov_var_to_reg(reg Register, var Var, config VarConfig) { } LocalVar { offset := var.offset - config.offset + is_far_var := offset > 0x80 || offset < -0x7f typ := if config.typ == 0 { var.typ } else { config.typ } instruction, size := match typ { @@ -673,16 +729,21 @@ fn (mut g Gen) mov_var_to_reg(reg Register, var Var, config VarConfig) { 'mov', 'UNKNOWN' } } + far_var_offset := if is_far_var { 0x40 } else { 0 } match reg { - .eax, .rax { g.write8(0x45) } - .edi, .rdi { g.write8(0x7d) } - .rsi { g.write8(0x75) } - .rdx { g.write8(0x55) } - .rbx { g.write8(0x5d) } - .rcx { g.write8(0x4d) } + .eax, .rax { g.write8(0x45 + far_var_offset) } + .edi, .rdi { g.write8(0x7d + far_var_offset) } + .rsi { g.write8(0x75 + far_var_offset) } + .rdx { g.write8(0x55 + far_var_offset) } + .rbx { g.write8(0x5d + far_var_offset) } + .rcx { g.write8(0x4d + far_var_offset) } else { g.n_error('mov_var_to_reg $reg') } } - g.write8(0xff - offset + 1) + if is_far_var { + g.write32(int((0xffffffff - i64(offset) + 1) % 0x100000000)) + } else { + g.write8((0xff - offset + 1) % 0x100) + } g.println('$instruction $reg, $size PTR [rbp-$offset.hex2()]') } GlobalVar { @@ -746,18 +807,20 @@ pub fn (mut g Gen) push(reg Register) { g.write8(0x41) g.write8(0x50 + int(reg) - 8) } - /* - match reg { - .rbp { g.write8(0x55) } - else {} + if mut g.code_gen is Amd64 { + g.code_gen.is_16bit_aligned = !g.code_gen.is_16bit_aligned } - */ g.println('push $reg') } pub fn (mut g Gen) pop(reg Register) { - g.write8(0x58 + int(reg)) - // TODO r8... + if int(reg) >= int(Register.r8) && int(reg) <= int(Register.r15) { + g.write8(0x41) + } + g.write8(0x58 + int(reg) % 8) + if mut g.code_gen is Amd64 { + g.code_gen.is_16bit_aligned = !g.code_gen.is_16bit_aligned + } g.println('pop $reg') } @@ -1444,31 +1507,21 @@ pub fn (mut g Gen) call_fn_amd64(node ast.CallExpr) { n = 'main.$n' } addr := g.fn_addr[n] - // Copy values to registers (calling convention) - // g.mov(.eax, 0) - for i in 0 .. node.args.len { - expr := node.args[i].expr - match expr { - ast.IntegerLiteral { - // `foo(2)` => `mov edi,0x2` - g.mov(native.fn_arg_registers[i], expr.val.int()) - } - ast.Ident { - // `foo(x)` => `mov edi,DWORD PTR [rbp-0x8]` - var_offset := g.get_var_offset(expr.name) - if g.pref.is_verbose { - println('i=$i fn name= $name offset=$var_offset') - println(int(native.fn_arg_registers[i])) - } - g.mov_var_to_reg(native.fn_arg_registers[i], expr as ast.Ident) - } - else { - g.v_error('unhandled call_fn (name=$name) node: $expr.type_name()', node.pos) - } - } + // not aligned now XOR pushed args will be odd + is_16bit_aligned := if mut g.code_gen is Amd64 { g.code_gen.is_16bit_aligned } else { true } != ( + node.args.len > 6 && node.args.len % 2 == 1) + if !is_16bit_aligned { + // dummy data + g.push(.rbp) } - if node.args.len > 6 { - g.v_error('more than 6 args not allowed for now', node.pos) + for i_ in 0 .. node.args.len { + i := node.args.len - i_ - 1 + g.expr(node.args[i].expr) + g.push(.rax) + } + num_on_register := if node.args.len > 6 { 6 } else { node.args.len } + for i in 0 .. num_on_register { + g.pop(native.fn_arg_registers[i]) } if addr == 0 { g.delay_fn_call(name) @@ -1477,6 +1530,14 @@ pub fn (mut g Gen) call_fn_amd64(node ast.CallExpr) { g.call(int(addr)) } g.println('call `${name}()`') + if !is_16bit_aligned { + // dummy data + g.pop(.rdi) + } + for _ in 0 .. node.args.len - 6 { + // args + g.pop(.rdi) + } } fn (mut g Gen) call_builtin_amd64(name string) i64 { @@ -2309,7 +2370,13 @@ fn (mut g Gen) fn_decl_amd64(node ast.FnDecl) { // `mov DWORD PTR [rbp-0x4],edi` offset += 4 // TODO size - g.mov_reg_to_var(LocalVar{offset, ast.int_type_idx, name}, native.fn_arg_registers[i]) + if i < 6 { + g.mov_reg_to_var(LocalVar{offset, ast.int_type_idx, name}, native.fn_arg_registers[i]) + } else { + // &var = rbp + 16, 24, 32, ... + g.var_offset[name] = (4 - i) * 8 + g.var_alloc_size[name] = 4 + } } // define defer vars for i in 0 .. node.defer_stmts.len { @@ -2318,6 +2385,10 @@ fn (mut g Gen) fn_decl_amd64(node ast.FnDecl) { } // g.stmts(node.stmts) + // 16 bytes align + g.stack_var_pos += 23 + g.stack_var_pos /= 16 + g.stack_var_pos *= 16 g.println('; stack frame size: $g.stack_var_pos') g.write32_at(local_alloc_pos + 3, g.stack_var_pos) is_main := node.name == 'main.main' @@ -2339,6 +2410,10 @@ pub fn (mut g Gen) builtin_decl_amd64(builtin BuiltinFn) { g.sub(.rsp, 0) builtin.body(builtin, mut g) + // 16 bytes align + g.stack_var_pos += 7 + g.stack_var_pos /= 16 + g.stack_var_pos *= 16 g.println('; stack frame size: $g.stack_var_pos') g.write32_at(local_alloc_pos + 3, g.stack_var_pos) @@ -2363,32 +2438,39 @@ pub fn (mut g Gen) allocate_var(name string, size int, initial_val int) int { // TODO return 0 } + padding := (size - g.stack_var_pos % size) % size + n := g.stack_var_pos + size + padding + is_far_var := n > 0x80 || n < -0x7f + far_var_offset := if is_far_var { 0x40 } else { 0 } // `a := 3` => `mov DWORD [rbp-0x4],0x3` match size { 1 { // BYTE g.write8(0xc6) - g.write8(0x45) + g.write8(0x45 + far_var_offset) } 4 { // DWORD g.write8(0xc7) - g.write8(0x45) + g.write8(0x45 + far_var_offset) } 8 { // QWORD g.write8(0x48) g.write8(0xc7) - g.write8(0x45) + g.write8(0x45 + far_var_offset) } else { g.n_error('allocate_var: bad size $size') } } // Generate N in `[rbp-N]` - n := g.stack_var_pos + size - g.write8(0xff - n + 1) - g.stack_var_pos += size + if is_far_var { + g.write32(int((0xffffffff - i64(n) + 1) % 0x100000000)) + } else { + g.write8((0xff - n + 1) % 0x100) + } + g.stack_var_pos += size + padding g.var_offset[name] = g.stack_var_pos g.var_alloc_size[name] = size diff --git a/vlib/v/gen/native/tests/sumcall.vv b/vlib/v/gen/native/tests/sumcall.vv index e1bf951f38..6a5ef6559b 100644 --- a/vlib/v/gen/native/tests/sumcall.vv +++ b/vlib/v/gen/native/tests/sumcall.vv @@ -7,6 +7,10 @@ fn sumcall2(a int, b int) int { return a + b } +fn sumcall7(a int, b int, c int, d int, e int, f int, g int) int { + return a + b + c + d + e + f + g +} + fn main() { r := sumcall (1,2) assert r == 3 @@ -14,5 +18,11 @@ fn main() { s := sumcall2 (1,2) assert s == 3 + a := sumcall7(1,2,s,4,5,6,7) + assert a == 28 + + b := sumcall7(sumcall7(1, 2, 3, 4, 5, 6, 7), 8, 9, sumcall7(10, 11, 12, 13, 14, 15, 16), 17, 18, sumcall7(19, 20, 21, 22, 23, 24, 25)) + assert b == 325 + exit(0) }