x.json2: skip whitespace before scanning (#9508)

2023-08-10 21:13:21 +03:00 · 2021-03-30 15:40:20 +08:00
parent c4e389be41
commit c5302bfcf5
5 changed files with 67 additions and 26 deletions
--- a/vlib/x/json2/decoder.v
+++ b/vlib/x/json2/decoder.v
@@ -4,7 +4,8 @@
 module json2
 // `Any` is a sum type that lists the possible types to be decoded and used.
-pub type Any = Null | []Any | bool | f32 | f64 | i64 | u64 | int | map[string]Any | string
+pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string |
 	u64
 // `Null` struct is a simple representation of the `null` value in JSON.
 pub struct Null {
--- a/vlib/x/json2/decoder_test.v
+++ b/vlib/x/json2/decoder_test.v
@@ -67,3 +67,14 @@ fn test_raw_decode_string_with_dollarsign() {
 	}
 	assert str.str() == r'Hello $world'
 }
 fn test_raw_decode_map_with_whitespaces() {
 	raw_mp := json2.raw_decode(' \n\t{"name":"Bob","age":20}\n\t') or {
 		eprintln(err.msg)
 		assert false
 		json2.Any{}
 	}
 	mp := raw_mp.as_map()
 	assert mp['name'].str() == 'Bob'
 	assert mp['age'].int() == 20
 }
--- a/vlib/x/json2/json2_test.v
+++ b/vlib/x/json2/json2_test.v
@@ -124,7 +124,7 @@ pub mut:
 	last_name     string [json: lastName]
 	is_registered bool   [json: IsRegistered]
 	typ           int    [json: 'type']
-	pets          string [raw; json: 'pet_animals']
+	pets          string [json: 'pet_animals'; raw]
 }
 fn (mut u User) from_json(an json2.Any) {
--- a/vlib/x/json2/scanner.v
+++ b/vlib/x/json2/scanner.v
@@ -64,16 +64,16 @@ const (
 )
 // move_pos proceeds to the next position.
-fn (mut s Scanner) move_pos() {
+fn (mut s Scanner) move() {
-	s.move(true, true)
+	s.move_pos(true, true)
 }
 // move_pos_with_newlines is the same as move_pos but only enables newline checking.
 fn (mut s Scanner) move_pos_with_newlines() {
-	s.move(false, true)
+	s.move_pos(false, true)
 }
-fn (mut s Scanner) move(include_space bool, include_newlines bool) {
+fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
 	s.pos++
 	if s.pos < s.text.len {
 		if include_newlines && s.text[s.pos] in json2.newlines {
@@ -83,13 +83,13 @@ fn (mut s Scanner) move(include_space bool, include_newlines bool) {
 				s.pos++
 			}
 			for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
-				s.move_pos()
+				s.move()
 			}
 		} else if include_space && s.text[s.pos] == ` ` {
 			s.pos++
 			s.col++
 			for s.pos < s.text.len && s.text[s.pos] == ` ` {
-				s.move_pos()
+				s.move()
 			}
 		}
 	} else {
@@ -118,7 +118,8 @@ fn (mut s Scanner) text_scan() Token {
 	mut has_closed := false
 	mut chrs := []byte{}
 	for {
-		s.move(false, false)
+		s.pos++
 		s.col++
 		if s.pos >= s.text.len {
 			break
 		}
@@ -135,15 +136,18 @@ fn (mut s Scanner) text_scan() Token {
 			peek := s.text[s.pos + 1]
 			if peek in json2.valid_unicode_escapes {
 				chrs << json2.unicode_transform_escapes[int(peek)]
-				s.move(false, false)
+				s.pos++
 				s.col++
 				continue
 			} else if peek == `u` {
 				if s.pos + 5 < s.text.len {
-					s.move(false, false)
+					s.pos++
 					s.col++
 					mut codepoint := []byte{}
 					codepoint_start := s.pos
 					for s.pos < s.text.len && s.pos < codepoint_start + 4 {
-						s.move(false, false)
+						s.pos++
 						s.col++
 						if s.text[s.pos] == `"` {
 							break
 						} else if !s.text[s.pos].is_hex_digit() {
@@ -178,7 +182,7 @@ fn (mut s Scanner) text_scan() Token {
 		chrs << ch
 	}
 	tok := s.tokenize(chrs, .str_)
-	s.move_pos()
+	s.move()
 	if !has_closed {
 		return s.error('missing double quotes in string closing')
 	}
@@ -236,14 +240,18 @@ fn (mut s Scanner) num_scan() Token {
 // invalid_token returns an error token with the invalid token message.
 fn (s Scanner) invalid_token() Token {
-	return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
+	if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
 		return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
 	} else {
 		return s.error('invalid token ${s.text[s.pos].str_escaped()}')
 	}
 }
 // scan returns a token based on the scanner's current position.
 [manualfree]
 fn (mut s Scanner) scan() Token {
-	for s.pos < s.text.len && s.text[s.pos] == ` ` {
+	if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
-		s.pos++
+		s.move()
 	}
 	if s.pos >= s.text.len {
 		return s.tokenize([]byte{}, .eof)
@@ -257,10 +265,10 @@ fn (mut s Scanner) scan() Token {
 			unsafe { ident.free() }
 			val := s.text[s.pos..s.pos + 4]
 			tok := s.tokenize(val, kind)
-			s.move_pos()
+			s.move() // n / t
-			s.move_pos()
+			s.move() // u / r
-			s.move_pos()
+			s.move() // l / u
-			s.move_pos()
+			s.move() // l / e
 			return tok
 		}
 		unsafe { ident.free() }
@@ -271,11 +279,11 @@ fn (mut s Scanner) scan() Token {
 			unsafe { ident.free() }
 			val := s.text[s.pos..s.pos + 5]
 			tok := s.tokenize(val, .bool_)
-			s.move_pos()
+			s.move() // f
-			s.move_pos()
+			s.move() // a
-			s.move_pos()
+			s.move() // l
-			s.move_pos()
+			s.move() // s
-			s.move_pos()
+			s.move() // e
 			return tok
 		}
 		unsafe { ident.free() }
@@ -283,7 +291,7 @@ fn (mut s Scanner) scan() Token {
 	} else if s.text[s.pos] in json2.char_list {
 		chr := s.text[s.pos]
 		tok := s.tokenize([]byte{}, TokenKind(int(chr)))
-		s.move_pos()
+		s.move()
 		return tok
 	} else if s.text[s.pos] == `"` {
 		return s.text_scan()
--- a/vlib/x/json2/scanner_test.v
+++ b/vlib/x/json2/scanner_test.v
@@ -328,3 +328,24 @@ fn test_bool_false() {
 	assert tok.lit.len == 5
 	assert tok.lit.bytestr() == 'false'
 }
 fn test_json_with_whitespace_start() {
 	mut sc := Scanner{
 		text: ' \n  \n\t {'.bytes()
 	}
 	tok := sc.scan()
 	eprintln(tok)
 	assert tok.kind == .lcbr
 	assert tok.lit.len == 0
 }
 fn test_json_with_whitespace_end() {
 	mut sc := Scanner{
 		text: '}  \n\t'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .rcbr
 	tok2 := sc.scan()
 	eprintln(tok2)
 	assert tok2.kind == .eof
 }