From c5302bfcf5acf153708f6c3a98245a00601061aa Mon Sep 17 00:00:00 2001 From: Ned Palacios <7358345+nedpals@users.noreply.github.com> Date: Tue, 30 Mar 2021 15:40:20 +0800 Subject: [PATCH] x.json2: skip whitespace before scanning (#9508) --- vlib/x/json2/decoder.v | 3 +- vlib/x/json2/decoder_test.v | 11 ++++++++ vlib/x/json2/json2_test.v | 2 +- vlib/x/json2/scanner.v | 56 +++++++++++++++++++++---------------- vlib/x/json2/scanner_test.v | 21 ++++++++++++++ 5 files changed, 67 insertions(+), 26 deletions(-) diff --git a/vlib/x/json2/decoder.v b/vlib/x/json2/decoder.v index 5009b4f84c..bb9ac95f9f 100644 --- a/vlib/x/json2/decoder.v +++ b/vlib/x/json2/decoder.v @@ -4,7 +4,8 @@ module json2 // `Any` is a sum type that lists the possible types to be decoded and used. -pub type Any = Null | []Any | bool | f32 | f64 | i64 | u64 | int | map[string]Any | string +pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string | + u64 // `Null` struct is a simple representation of the `null` value in JSON. pub struct Null { diff --git a/vlib/x/json2/decoder_test.v b/vlib/x/json2/decoder_test.v index 32e193ceec..9168c8e965 100644 --- a/vlib/x/json2/decoder_test.v +++ b/vlib/x/json2/decoder_test.v @@ -67,3 +67,14 @@ fn test_raw_decode_string_with_dollarsign() { } assert str.str() == r'Hello $world' } + +fn test_raw_decode_map_with_whitespaces() { + raw_mp := json2.raw_decode(' \n\t{"name":"Bob","age":20}\n\t') or { + eprintln(err.msg) + assert false + json2.Any{} + } + mp := raw_mp.as_map() + assert mp['name'].str() == 'Bob' + assert mp['age'].int() == 20 +} diff --git a/vlib/x/json2/json2_test.v b/vlib/x/json2/json2_test.v index e8893d2300..5df73f9c5d 100644 --- a/vlib/x/json2/json2_test.v +++ b/vlib/x/json2/json2_test.v @@ -124,7 +124,7 @@ pub mut: last_name string [json: lastName] is_registered bool [json: IsRegistered] typ int [json: 'type'] - pets string [raw; json: 'pet_animals'] + pets string [json: 'pet_animals'; raw] } fn (mut u User) from_json(an json2.Any) { diff --git a/vlib/x/json2/scanner.v b/vlib/x/json2/scanner.v index 82144753b0..f8adb70fd3 100644 --- a/vlib/x/json2/scanner.v +++ b/vlib/x/json2/scanner.v @@ -64,16 +64,16 @@ const ( ) // move_pos proceeds to the next position. -fn (mut s Scanner) move_pos() { - s.move(true, true) +fn (mut s Scanner) move() { + s.move_pos(true, true) } // move_pos_with_newlines is the same as move_pos but only enables newline checking. fn (mut s Scanner) move_pos_with_newlines() { - s.move(false, true) + s.move_pos(false, true) } -fn (mut s Scanner) move(include_space bool, include_newlines bool) { +fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) { s.pos++ if s.pos < s.text.len { if include_newlines && s.text[s.pos] in json2.newlines { @@ -83,13 +83,13 @@ fn (mut s Scanner) move(include_space bool, include_newlines bool) { s.pos++ } for s.pos < s.text.len && s.text[s.pos] in json2.newlines { - s.move_pos() + s.move() } } else if include_space && s.text[s.pos] == ` ` { s.pos++ s.col++ for s.pos < s.text.len && s.text[s.pos] == ` ` { - s.move_pos() + s.move() } } } else { @@ -118,7 +118,8 @@ fn (mut s Scanner) text_scan() Token { mut has_closed := false mut chrs := []byte{} for { - s.move(false, false) + s.pos++ + s.col++ if s.pos >= s.text.len { break } @@ -135,15 +136,18 @@ fn (mut s Scanner) text_scan() Token { peek := s.text[s.pos + 1] if peek in json2.valid_unicode_escapes { chrs << json2.unicode_transform_escapes[int(peek)] - s.move(false, false) + s.pos++ + s.col++ continue } else if peek == `u` { if s.pos + 5 < s.text.len { - s.move(false, false) + s.pos++ + s.col++ mut codepoint := []byte{} codepoint_start := s.pos for s.pos < s.text.len && s.pos < codepoint_start + 4 { - s.move(false, false) + s.pos++ + s.col++ if s.text[s.pos] == `"` { break } else if !s.text[s.pos].is_hex_digit() { @@ -178,7 +182,7 @@ fn (mut s Scanner) text_scan() Token { chrs << ch } tok := s.tokenize(chrs, .str_) - s.move_pos() + s.move() if !has_closed { return s.error('missing double quotes in string closing') } @@ -236,14 +240,18 @@ fn (mut s Scanner) num_scan() Token { // invalid_token returns an error token with the invalid token message. fn (s Scanner) invalid_token() Token { - return s.error('invalid token `${s.text[s.pos].ascii_str()}`') + if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 { + return s.error('invalid token `${s.text[s.pos].ascii_str()}`') + } else { + return s.error('invalid token ${s.text[s.pos].str_escaped()}') + } } // scan returns a token based on the scanner's current position. [manualfree] fn (mut s Scanner) scan() Token { - for s.pos < s.text.len && s.text[s.pos] == ` ` { - s.pos++ + if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) { + s.move() } if s.pos >= s.text.len { return s.tokenize([]byte{}, .eof) @@ -257,10 +265,10 @@ fn (mut s Scanner) scan() Token { unsafe { ident.free() } val := s.text[s.pos..s.pos + 4] tok := s.tokenize(val, kind) - s.move_pos() - s.move_pos() - s.move_pos() - s.move_pos() + s.move() // n / t + s.move() // u / r + s.move() // l / u + s.move() // l / e return tok } unsafe { ident.free() } @@ -271,11 +279,11 @@ fn (mut s Scanner) scan() Token { unsafe { ident.free() } val := s.text[s.pos..s.pos + 5] tok := s.tokenize(val, .bool_) - s.move_pos() - s.move_pos() - s.move_pos() - s.move_pos() - s.move_pos() + s.move() // f + s.move() // a + s.move() // l + s.move() // s + s.move() // e return tok } unsafe { ident.free() } @@ -283,7 +291,7 @@ fn (mut s Scanner) scan() Token { } else if s.text[s.pos] in json2.char_list { chr := s.text[s.pos] tok := s.tokenize([]byte{}, TokenKind(int(chr))) - s.move_pos() + s.move() return tok } else if s.text[s.pos] == `"` { return s.text_scan() diff --git a/vlib/x/json2/scanner_test.v b/vlib/x/json2/scanner_test.v index 935f3be8fc..73f4d797d7 100644 --- a/vlib/x/json2/scanner_test.v +++ b/vlib/x/json2/scanner_test.v @@ -328,3 +328,24 @@ fn test_bool_false() { assert tok.lit.len == 5 assert tok.lit.bytestr() == 'false' } + +fn test_json_with_whitespace_start() { + mut sc := Scanner{ + text: ' \n \n\t {'.bytes() + } + tok := sc.scan() + eprintln(tok) + assert tok.kind == .lcbr + assert tok.lit.len == 0 +} + +fn test_json_with_whitespace_end() { + mut sc := Scanner{ + text: '} \n\t'.bytes() + } + tok := sc.scan() + assert tok.kind == .rcbr + tok2 := sc.scan() + eprintln(tok2) + assert tok2.kind == .eof +}