1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

transformer: provide direct_memory_access to arrays when safe (#12724)

This commit is contained in:
Thomas Mangin
2021-12-11 19:55:46 +00:00
committed by GitHub
parent fe14e2fceb
commit 0d0d7323bb
7 changed files with 623 additions and 27 deletions

View File

@@ -4,46 +4,352 @@ import v.pref
import v.ast
import v.util
struct KeyVal {
key string
value int
}
[if debug_bounds_checking ?]
fn debug_bounds_checking(str string) {
println(str)
}
// IndexState is used to track the index analysis performed when parsing the code
// `IndexExpr` nodes are annotated with `is_direct`, indicating that the array index can be safely directly accessed.
// The c_gen code check will handle this annotation and perform this direct memory access. The following cases are considered valid for this optimisation:
// 1. the array size is known and has a `len` larger than the index requested
// 2. the array was previously accessed with a higher value which would have reported the issue already
// 3. the array was created from a range expression a := range[10..13] and the offset'ed indexes are safe
// Current limitations:
// * any function using break/continue or goto/label stopped from being optimised as soon as the relevant AST nodes are found as the code can not be ensured to be sequential
// * `enum` and `const` indexes are not optimised (they could probably be looked up)
// * for loops with multiple var in their init and/or inc are not analysed
// * mut array are not analysed as their size can be reduced, but self-assignment in a single line
pub struct IndexState {
mut:
// max_index has the biggest array index accessed for then named array
// so if a[2] was set or read, it will be 2
// A new array with no .len will recorded as -1 (accessing a[0] would be invalid)
// the value -2 is used to indicate that the array should not be analysed
// this is used for a mut array
max_index map[string]int
// We need to snapshot when entering `if` and `for` blocks and restore on exit
// as the statements may not be run. This is managed by indent() & unindent().
saved_disabled []bool
saved_key_vals [][]KeyVal
pub mut:
// on encountering goto/break/continue statements we stop any analysis
// for the current function (as the code is not linear anymore)
disabled bool
level int
}
// we are remembering the last array accessed and checking if the value is safe
// the node is updated with this information which can then be used by the code generators
fn (mut i IndexState) safe_access(key string, new int) bool {
$if no_bounds_checking {
return false
}
if i.disabled {
return false
}
old := i.max_index[key] or {
debug_bounds_checking('$i.level ${key}.len = $new')
i.max_index[key] = new
return false
}
if new > old {
if old < -1 {
debug_bounds_checking('$i.level $key[$new] unsafe (mut array)')
return false
}
debug_bounds_checking('$i.level $key[$new] unsafe (index was $old)')
i.max_index[key] = new
return false
}
debug_bounds_checking('$i.level $key[$new] safe (index is $old)')
return true
}
// safe_offset returns for a previvous array what was the highest
// offset we ever accessed for that identifier
fn (mut i IndexState) safe_offset(key string) int {
$if no_bounds_checking {
return -2
}
if i.disabled {
return -2
}
return i.max_index[key] or { -1 }
}
// indent is used for when encountering new code blocks (if, for and functions)
// The code analysis needs to take into consideration blocks of code which
// may not run at runtime (if/for) and therefore even if a new maximum for an
// index access is found on an if branch it can not be used within the parent
// code. The same is true with for blocks. indent() snapshot the current state,
// to allow restoration with unindent()
// Also within a function, analysis must be `disabled` when goto or break are
// encountered as the code flow is then not lineear, and only restart when a
// new function analysis is started.
[if !no_bounds_checking]
fn (mut i IndexState) indent(is_function bool) {
mut kvs := []KeyVal{cap: i.max_index.len}
for k, v in i.max_index {
kvs << KeyVal{k, v}
}
i.saved_disabled << i.disabled
i.saved_key_vals << kvs
if is_function {
i.disabled = false
}
i.level += 1
}
// restoring the data as it was before the if/for/unsafe block
[if !no_bounds_checking]
fn (mut i IndexState) unindent() {
i.level -= 1
mut keys := []string{cap: i.max_index.len}
for k, _ in i.max_index {
keys << k
}
for k in keys {
i.max_index.delete(k)
}
for saved in i.saved_key_vals.pop() {
i.max_index[saved.key] = saved.value
}
i.disabled = i.saved_disabled.pop()
}
pub struct Transformer {
pref &pref.Preferences
pub mut:
index &IndexState
}
pub fn new_transformer(pref &pref.Preferences) &Transformer {
return &Transformer{
pref: pref
index: &IndexState{
saved_key_vals: [][]KeyVal{cap: 1000}
saved_disabled: []bool{cap: 1000}
}
}
}
pub fn (t Transformer) transform_files(ast_files []&ast.File) {
pub fn (mut t Transformer) transform_files(ast_files []&ast.File) {
for i in 0 .. ast_files.len {
mut file := unsafe { ast_files[i] }
t.transform(mut file)
}
}
pub fn (t Transformer) transform(mut ast_file ast.File) {
pub fn (mut t Transformer) transform(mut ast_file ast.File) {
for mut stmt in ast_file.stmts {
t.stmt(mut stmt)
}
}
pub fn (t Transformer) stmt(mut node ast.Stmt) {
pub fn (mut t Transformer) find_new_array_len(node ast.AssignStmt) {
// looking for, array := []type{len:int}
mut right := node.right[0]
if mut right is ast.ArrayInit {
mut left := node.left[0]
if mut left is ast.Ident {
// we can not analyse mut array
if left.is_mut {
t.index.safe_access(left.name, -2)
return
}
// as we do not need to check any value under the setup len
if !right.has_len {
t.index.safe_access(left.name, -1)
return
}
mut len := int(0)
value := right.len_expr
if value is ast.IntegerLiteral {
len = value.val.int() + 1
}
t.index.safe_access(left.name, len)
}
}
}
pub fn (mut t Transformer) find_new_range(node ast.AssignStmt) {
// looking for, array := []type{len:int}
mut right := node.right[0]
if mut right is ast.IndexExpr {
mut left := node.left[0]
if mut left is ast.Ident {
// we can not analyse mut array
if left.is_mut {
t.index.safe_access(left.name, -2)
return
}
index := right.index
if index is ast.RangeExpr {
range_low := index.low
if range_low is ast.IntegerLiteral {
sub_left := right.left
if sub_left is ast.Ident {
safe := t.index.safe_offset(sub_left.name)
low := range_low.val.int()
if safe >= low {
t.index.safe_access(left.name, safe - low)
}
}
}
}
}
}
}
pub fn (mut t Transformer) find_mut_self_assign(node ast.AssignStmt) {
// even if mutable we can be sure than `a[1] = a[2] is safe
}
pub fn (mut t Transformer) find_assert_len(node ast.InfixExpr) {
right := node.right
match right {
ast.IntegerLiteral {
left := node.left
if left is ast.SelectorExpr {
len := right.val.int()
if left.field_name == 'len' {
match node.op {
.eq { // ==
t.index.safe_access(left.expr.str(), len - 1)
}
.ge { // >=
t.index.safe_access(left.expr.str(), len - 1)
}
.gt { // >
t.index.safe_access(left.expr.str(), len)
}
else {}
}
}
}
}
ast.SelectorExpr {
left := node.left
if left is ast.IntegerLiteral {
len := left.val.int()
if right.field_name == 'len' {
match node.op {
.eq { // ==
t.index.safe_access(right.expr.str(), len - 1)
}
.le { // <=
t.index.safe_access(right.expr.str(), len - 1)
}
.lt { // <
t.index.safe_access(right.expr.str(), len)
}
else {}
}
}
}
}
else {}
}
}
pub fn (mut t Transformer) check_safe_array(mut node ast.IndexExpr) {
if !node.is_array {
return
}
index := node.index
name := node.left
match index {
ast.IntegerLiteral {
node.is_direct = t.index.safe_access(name.str(), index.val.int())
}
ast.RangeExpr {
if index.has_high {
high := index.high
if high is ast.IntegerLiteral {
t.index.safe_access(name.str(), high.val.int())
return
}
}
if index.has_low {
low := index.low
if low is ast.IntegerLiteral {
t.index.safe_access(name.str(), low.val.int())
return
}
}
}
ast.CastExpr {
// do not deal with weird casting
if index.typname != 'int' {
return
}
index_expr := index.expr
if index_expr is ast.IntegerLiteral {
val := index_expr.val
node.is_direct = t.index.safe_access(name.str(), val.int())
}
}
ast.EnumVal {
debug_bounds_checking('? $name[.$index.val] safe?: no-idea (yet)!')
}
ast.Ident {
// we may be able to track const value in simple cases
}
else {}
}
}
pub fn (mut t Transformer) stmt(mut node ast.Stmt) {
match mut node {
ast.EmptyStmt {}
ast.NodeError {}
ast.AsmStmt {}
ast.AssertStmt {}
ast.AssertStmt {
expr := node.expr
match expr {
ast.InfixExpr {
t.find_assert_len(expr)
}
else {
t.expr(expr)
}
}
}
ast.AssignStmt {
t.find_new_array_len(node)
t.find_new_range(node)
t.find_mut_self_assign(node)
for mut right in node.right {
right = t.expr(right)
}
for left in node.left {
t.expr(left)
}
}
ast.Block {
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.BranchStmt {
// break or continue:
// we can not rely on sequential scanning and need to cancel all index optimisation
t.index.disabled = true
}
ast.BranchStmt {}
ast.ComptimeFor {}
ast.ConstDecl {
for mut field in node.fields {
@@ -74,24 +380,76 @@ pub fn (t Transformer) stmt(mut node ast.Stmt) {
}
}
ast.FnDecl {
t.index.indent(true)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.ForCStmt {
// TODO we do not optimise array access for multi init
// for a,b := 0,1; a < 10; a,b = a+b, a {...}
// https://github.com/vlang/v/issues/12782
// if node.has_init && !node.is_multi {
// mut init := node.init
// t.stmt(mut init)
// }
if node.has_cond {
t.expr(node.cond)
}
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
// https://github.com/vlang/v/issues/12782
// if node.has_inc && !node.is_multi {
// mut inc := node.inc
// t.stmt(mut inc)
// }
}
ast.ForInStmt {
// indexes access within the for itself are not optimised (yet)
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.ForCStmt {}
ast.ForInStmt {}
ast.ForStmt {
cond_expr := t.expr(node.cond)
node = &ast.ForStmt{
...node
cond: t.expr(node.cond)
cond: cond_expr
}
if node.cond is ast.BoolLiteral && !(node.cond as ast.BoolLiteral).val { // for false { ... } should be eleminated
node = &ast.EmptyStmt{}
match node.cond {
ast.BoolLiteral {
if !(node.cond as ast.BoolLiteral).val { // for false { ... } should be eleminated
node = &ast.EmptyStmt{}
}
}
else {
if !node.is_inf {
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
}
}
}
ast.GlobalDecl {}
ast.GotoLabel {}
ast.GotoStmt {}
ast.GotoStmt {
// we can not rely on sequential scanning and need to cancel all index optimisation
t.index.disabled = true
}
ast.HashStmt {}
ast.Import {}
ast.InterfaceDecl {}
@@ -107,16 +465,30 @@ pub fn (t Transformer) stmt(mut node ast.Stmt) {
}
}
pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
pub fn (mut t Transformer) expr(node ast.Expr) ast.Expr {
match mut node {
ast.CallExpr {
for arg in node.args {
t.expr(arg.expr)
}
return node
}
ast.InfixExpr {
return t.infix_expr(node)
}
ast.OrExpr {
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
return node
}
ast.IndexExpr {
return ast.IndexExpr{
t.check_safe_array(mut node)
mut index := ast.IndexExpr{
...node
index: t.expr(node.index)
}
return index
}
ast.IfExpr {
for mut branch in node.branches {
@@ -124,6 +496,7 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
...(*branch)
cond: t.expr(branch.cond)
}
t.index.indent(false)
for i, mut stmt in branch.stmts {
t.stmt(mut stmt)
@@ -151,7 +524,10 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
}
}
}
t.index.unindent()
}
// where we place the result of the if when a := if ...
t.expr(node.left)
return node
}
ast.MatchExpr {
@@ -163,6 +539,7 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
for mut expr in branch.exprs {
expr = t.expr(expr)
}
t.index.indent(false)
for i, mut stmt in branch.stmts {
t.stmt(mut stmt)
@@ -190,24 +567,34 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
}
}
}
t.index.unindent()
}
return node
}
ast.AnonFn {
return node
}
ast.PostfixExpr {
return node
}
ast.PrefixExpr {
return node
}
ast.Likely {
return node
}
else {
return node
}
}
}
pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
pub fn (mut t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
mut stop_index, mut unreachable_branches := -1, []int{cap: original.branches.len}
if original.is_comptime {
return *original
}
for i, mut branch in original.branches {
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
cond := t.expr(branch.cond)
branch = ast.IfBranch{
...(*branch)
@@ -221,6 +608,11 @@ pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
unreachable_branches << i
}
}
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
if stop_index != -1 {
unreachable_branches = unreachable_branches.filter(it < stop_index)
@@ -243,7 +635,7 @@ pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
return *original
}
pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
pub fn (mut t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
cond, mut terminate := t.expr(original.cond), false
original = ast.MatchExpr{
...(*original)
@@ -251,13 +643,14 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
}
for mut branch in original.branches {
if branch.is_else {
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
continue
}
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
for mut expr in branch.exprs {
expr = t.expr(expr)
@@ -314,6 +707,12 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
}
}
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
if terminate {
break
}
@@ -321,7 +720,7 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
return *original
}
pub fn (t Transformer) infix_expr(original ast.InfixExpr) ast.Expr {
pub fn (mut t Transformer) infix_expr(original ast.InfixExpr) ast.Expr {
mut node := original
node.left = t.expr(node.left)
node.right = t.expr(node.right)