From b343f19bec58eae026c297d99e39104091128aed Mon Sep 17 00:00:00 2001 From: Ruofan XU <47302112+SleepyRoy@users.noreply.github.com> Date: Sun, 19 Sep 2021 21:22:28 +0800 Subject: [PATCH] v.scanner: fix ambiguity of two-level generics and shift-right (#11540) --- vlib/builtin/string.v | 7 +++++ vlib/v/fmt/fmt.v | 2 +- vlib/v/scanner/scanner.v | 52 ++++++++++++++++++++++++++++++---- vlib/v/tests/generics_test.v | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index ebe3b33ecc..3d4096d95c 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -1274,6 +1274,13 @@ pub fn (c byte) is_letter() bool { return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) } +// is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise. +// Example: assert byte(`V`) == true +[inline] +pub fn (c byte) is_alnum() bool { + return c.is_letter() || c.is_digit() +} + // free allows for manually freeing the memory occupied by the string [manualfree; unsafe] pub fn (s &string) free() { diff --git a/vlib/v/fmt/fmt.v b/vlib/v/fmt/fmt.v index 0814fe3ed1..dee9dc940c 100644 --- a/vlib/v/fmt/fmt.v +++ b/vlib/v/fmt/fmt.v @@ -1534,7 +1534,7 @@ pub fn (mut f Fmt) call_expr(node ast.CallExpr) { f.comments(arg.comments) } if node.is_method { - if node.name in ['map', 'filter'] { + if node.name in ['map', 'filter', 'all', 'any'] { f.inside_lambda = true defer { f.inside_lambda = false diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index e7fe9fd207..52c301ff12 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -11,6 +11,7 @@ import v.pref import v.util import v.vet import v.errors +import v.ast const ( single_quote = `'` @@ -36,6 +37,7 @@ pub mut: is_inter_end bool is_enclosed_inter bool line_comment string + last_lt int = -1 // position of latest < // prev_tok TokenKind is_started bool is_print_line_on_error bool @@ -917,12 +919,49 @@ fn (mut s Scanner) text_scan() token.Token { return s.new_token(.ge, '', 2) } else if nextc == `>` { if s.pos + 2 < s.text.len { - if s.text[s.pos + 2] == `=` { - s.pos += 2 - return s.new_token(.right_shift_assign, '', 3) - } else if s.text[s.pos + 2] in [`(`, `)`, `{`, `>`, `,`] { - // multi-level generics such as Foo>{ }, func>( ), etc - return s.new_token(.gt, '', 1) + // first eat the possible spaces eg `>> (` => `>>(` + mut non_space_pos := s.pos + 2 + for non_space_pos < s.text.len && s.text[non_space_pos].is_space() { + non_space_pos++ + } + match s.text[non_space_pos] { + `=` { + s.pos += 2 + return s.new_token(.right_shift_assign, '', 3) + } + // definite generic cases such as Foo>{} + `)`, `{`, `}`, `,`, `>`, `[`, `]` { + return s.new_token(.gt, '', 1) + } + // notice two-level generic call and shift-right share the rest patterns + // such as `foo>(a)` vs `a, b := Foo{}>(baz)` + // which is hard but could be discriminated by my following algorithm + // @SleepyRoy if you have smarter algorithm :-) + else { + // almost correct heuristics: 2-level generic call's last cannot be extremely long + // here we set the limit 100 which should be nice for real cases + if s.last_lt >= 0 && s.pos - s.last_lt < 100 { + // ...Bar> => + // int, []Foo, [20]f64, map[string][]bool => + // int, Foo, f64, bool + typs := s.text[s.last_lt + 1..s.pos].trim_right('>').split(',').map(it.trim_space().trim_right('>').after(']')) + // if any typ is neither builtin nor Type, then the case is not generics + for typ in typs { + if typ.len == 0 { + s.pos++ + return s.new_token(.right_shift, '', 2) + } + if typ !in ast.builtin_type_names && !(typ[0].is_capital() + && typ[1..].bytes().all(it.is_alnum())) { + s.pos++ + return s.new_token(.right_shift, '', 2) + } + } + return s.new_token(.gt, '', 1) + } + s.pos++ + return s.new_token(.right_shift, '', 2) + } } } s.pos++ @@ -946,6 +985,7 @@ fn (mut s Scanner) text_scan() token.Token { s.pos++ return s.new_token(.arrow, '', 2) } else { + s.last_lt = s.pos return s.new_token(.lt, '', 1) } } diff --git a/vlib/v/tests/generics_test.v b/vlib/v/tests/generics_test.v index 8a6a9fb5ca..0a941bf83e 100644 --- a/vlib/v/tests/generics_test.v +++ b/vlib/v/tests/generics_test.v @@ -510,6 +510,21 @@ fn test_multi_level_generics() { two) == 20 } +struct Empty {} + +fn (e1 Empty) < (e2 Empty) bool { + return true +} + +struct TandU { + t T + u U +} + +fn boring_function(t T) bool { + return true +} + fn test_generic_detection() { v1, v2 := -1, 1 @@ -530,4 +545,43 @@ fn test_generic_detection() { assert multi_generic_args<[]int, int>([]int{}, 0) assert multi_generic_args(map[int]int{}, 0) assert 0 < return_one(10, 0) + + // "the hardest cases" + foo, bar, baz := 1, 2, 16 + res1, res2 := foo < bar, baz >> (foo + 1 - 1) + assert res1 + assert res2 == 8 + res3, res4 := Empty{} < Empty{}, baz >> (foo + 1 - 1) + assert res3 + assert res4 == 8 + assert boring_function>(TandU{ + t: Empty{} + u: 10 + }) + + assert boring_function>>(MultiLevel>{ + foo: MultiLevel{ + foo: 10 + } + }) + + assert boring_function, []int>>(TandU, []int>{ + t: MultiLevel{ + foo: 10 + } + u: [10] + }) + + // this final case challenges your scanner :-) + assert boring_function>, map[string][]int>>(TandU>, map[string][]int>{ + t: TandU>{ + t: 20 + u: MultiLevel{ + foo: Empty{} + } + } + u: { + 'bar': [40] + } + }) }