x.json2: add customized JSON output capability via Encoder (#13654)

2023-08-10 21:13:21 +03:00 · 2022-03-04 19:39:23 +08:00
parent 74d5106e8f
commit 437fa02f27
6 changed files with 275 additions and 168 deletions
--- a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v
+++ b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v
@@ -236,13 +236,13 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 	match value {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
 				'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
@@ -252,16 +252,16 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 			// It seems it's implementation specific how time and
 			// date-time values are represented in detail. For now we follow the BurntSushi format
 			// that expands to 6 digits which is also a valid RFC 3339 representation.
-			json_text = to_alexcrichton_time(json_text)
+			json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
 			return '{ "type": "$typ", "value": "$json_text" }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "date", "value": "$json_text" }'
+			return '{ "type": "date", "value": $json_text }'
 		}
 		ast.Time {
 			mut json_text := json2.Any(value.text).json_str()
-			json_text = to_alexcrichton_time(json_text)
+			json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
 			return '{ "type": "time", "value": "$json_text" }'
 		}
 		ast.Bool {
@@ -270,12 +270,12 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			text := value.text
 			if text.contains('inf') || text.contains('nan') {
-				return '{ "type": "float", "value": "$value.text" }'
+				return '{ "type": "float", "value": $value.text }'
 			}
 			if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) {
 				mut val := ''
@@ -297,7 +297,7 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_alexcrichton(val, array_type)},'
+				str += ' $json_key: ${to_alexcrichton(val, array_type)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/tests/burntsushi.toml-test_test.v
+++ b/vlib/toml/tests/burntsushi.toml-test_test.v
@@ -199,26 +199,30 @@ fn to_burntsushi(value ast.Value) string {
 	match value {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+
+			// NB: Since encoding strings in JSON now automatically includes quotes,
+			// I added a somewhat a workaround by adding an ending quote in order to
+			// recognize properly the date time type. - Ned
+			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
 				'datetime-local'
 			}
-			return '{ "type": "$typ", "value": "$json_text" }'
+			return '{ "type": "$typ", "value": $json_text }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "date-local", "value": "$json_text" }'
+			return '{ "type": "date-local", "value": $json_text }'
 		}
 		ast.Time {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "time-local", "value": "$json_text" }'
+			return '{ "type": "time-local", "value": $json_text }'
 		}
 		ast.Bool {
 			json_text := json2.Any(value.text.bool()).json_str()
@@ -226,7 +230,7 @@ fn to_burntsushi(value ast.Value) string {
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			if value.text.contains('inf') || value.text.contains('nan') {
@@ -251,7 +255,7 @@ fn to_burntsushi(value ast.Value) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_burntsushi(val)},'
+				str += ' $json_key: ${to_burntsushi(val)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/tests/iarna.toml-spec-tests_test.v
+++ b/vlib/toml/tests/iarna.toml-spec-tests_test.v
@@ -288,15 +288,15 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
 				'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
@@ -306,40 +306,41 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 			// It seems it's implementation specific how time and
 			// date-time values are represented in detail. For now we follow the BurntSushi format
 			// that expands to 6 digits which is also a valid RFC 3339 representation.
-			json_text = to_iarna_time(json_text)
+			json_text = to_iarna_time(json_text[1..json_text.len - 1])
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
 			return '{ "type": "$typ", "value": "$json_text" }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
-			return '{ "type": "date", "value": "$json_text" }'
+			return '{ "type": "date", "value": $json_text }'
 		}
 		ast.Time {
 			mut json_text := json2.Any(value.text).json_str()
-			json_text = to_iarna_time(json_text)
+			// NB: Removes the quotes of the encoded JSON string - Ned
+			json_text = to_iarna_time(json_text[1..json_text.len - 1])
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
 			return '{ "type": "time", "value": "$json_text" }'
 		}
 		ast.Bool {
 			json_text := json2.Any(value.text.bool()).json_str()
 			if skip_value_map {
-				return '$json_text'
+				return json_text
 			}
 			return '{ "type": "bool", "value": "$json_text" }'
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '$json_text'
+				return json_text
 			}
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			if value.text.contains('inf') {
@@ -384,7 +385,7 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_iarna(val, skip_value_map)},'
+				str += ' $json_key: ${to_iarna(val, skip_value_map)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/to/to.v
+++ b/vlib/toml/to/to.v
@@ -27,19 +27,16 @@ fn any_to_json(a toml.Any) string {
 			return 'null'
 		}
 		toml.DateTime {
-			json_text := json2.Any(a.str())
-			return '"$json_text.json_str()"'
+			return json2.Any(a.str()).json_str()
 		}
 		toml.Date {
-			json_text := json2.Any(a.str())
-			return '"$json_text.json_str()"'
+			return json2.Any(a.str()).json_str()
 		}
 		toml.Time {
-			json_text := json2.Any(a.str())
-			return '"$json_text.json_str()"'
+			return json2.Any(a.str()).json_str()
 		}
 		string {
-			return '"' + json2.Any(a.str()).json_str() + '"'
+			return json2.Any(a.str()).json_str()
 		}
 		bool {
 			return json2.Any(bool(a)).json_str()
@@ -63,7 +60,7 @@ fn any_to_json(a toml.Any) string {
 			mut str := '{'
 			for key, val in a {
 				json_key := json2.Any(key)
-				str += ' "$json_key.json_str()": ${any_to_json(val)},'
+				str += ' $json_key.json_str(): ${any_to_json(val)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/x/json2/encoder.v
+++ b/vlib/x/json2/encoder.v
@@ -3,54 +3,125 @@
 // that can be found in the LICENSE file.
 module json2

+import io
 import strings

-fn write_value(v Any, i int, len int, mut wr strings.Builder) {
-	str := v.json_str()
-	if v is string {
-		wr.write_string('"$str"')
-	} else {
-		wr.write_string(str)
-	}
-	if i >= len - 1 {
-		return
-	}
-	wr.write_byte(`,`)
+// Encoder encodes the an `Any` type into JSON representation.
+// It provides parameters in order to change the end result.
+pub struct Encoder {
+	newline              byte
+	newline_spaces_count int
+	escape_unicode       bool = true
 }

-// str returns the string representation of the `map[string]Any`.
-[manualfree]
-pub fn (flds map[string]Any) str() string {
-	mut wr := strings.new_builder(200)
-	wr.write_byte(`{`)
-	mut i := 0
-	for k, v in flds {
-		wr.write_string('"$k":')
-		write_value(v, i, flds.len, mut wr)
-		i++
-	}
-	wr.write_byte(`}`)
-	defer {
-		unsafe { wr.free() }
-	}
-	res := wr.str()
-	return res
+// byte array versions of the most common tokens/chars
+// to avoid reallocations
+const null_in_bytes = 'null'.bytes()
+
+const true_in_bytes = 'true'.bytes()
+
+const false_in_bytes = 'false'.bytes()
+
+const zero_in_bytes = [byte(`0`)]
+
+const comma_bytes = [byte(`,`)]
+
+const colon_bytes = [byte(`:`)]
+
+const space_bytes = [byte(` `)]
+
+const unicode_escape_chars = [byte(`\\`), `u`]
+
+const quote_bytes = [byte(`"`)]
+
+const escaped_chars = [(r'\b').bytes(), (r'\f').bytes(), (r'\n').bytes(),
+	(r'\r').bytes(), (r'\t').bytes()]
+
+// encode_value encodes an `Any` value to the specific writer.
+pub fn (e &Encoder) encode_value(f Any, mut wr io.Writer) ? {
+	e.encode_value_with_level(f, 1, mut wr) ?
 }

-// str returns the string representation of the `[]Any`.
-[manualfree]
-pub fn (flds []Any) str() string {
-	mut wr := strings.new_builder(200)
-	wr.write_byte(`[`)
-	for i, v in flds {
-		write_value(v, i, flds.len, mut wr)
+fn (e &Encoder) encode_newline(level int, mut wr io.Writer) ? {
+	if e.newline != 0 {
+		wr.write([e.newline]) ?
+		for j := 0; j < level * e.newline_spaces_count; j++ {
+			wr.write(json2.space_bytes) ?
+		}
 	}
-	wr.write_byte(`]`)
-	defer {
-		unsafe { wr.free() }
+}
+
+fn (e &Encoder) encode_value_with_level(f Any, level int, mut wr io.Writer) ? {
+	match f {
+		string {
+			e.encode_string(f, mut wr) ?
+		}
+		bool {
+			if f == true {
+				wr.write(json2.true_in_bytes) ?
+			} else {
+				wr.write(json2.false_in_bytes) ?
+			}
+		}
+		int, u64, i64 {
+			wr.write(f.str().bytes()) ?
+		}
+		f32, f64 {
+			$if !nofloat ? {
+				str_float := f.str().bytes()
+				wr.write(str_float) ?
+				if str_float[str_float.len - 1] == `.` {
+					wr.write(json2.zero_in_bytes) ?
+				}
+				return
+			}
+			wr.write(json2.zero_in_bytes) ?
+		}
+		map[string]Any {
+			wr.write([byte(`{`)]) ?
+			mut i := 0
+			for k, v in f {
+				e.encode_newline(level, mut wr) ?
+				e.encode_string(k, mut wr) ?
+				wr.write(json2.colon_bytes) ?
+				if e.newline != 0 {
+					wr.write(json2.space_bytes) ?
+				}
+				e.encode_value_with_level(v, level + 1, mut wr) ?
+				if i < f.len - 1 {
+					wr.write(json2.comma_bytes) ?
+				}
+				i++
+			}
+			e.encode_newline(level - 1, mut wr) ?
+			wr.write([byte(`}`)]) ?
+		}
+		[]Any {
+			wr.write([byte(`[`)]) ?
+			for i, v in f {
+				e.encode_newline(level, mut wr) ?
+				e.encode_value_with_level(v, level + 1, mut wr) ?
+				if i < f.len - 1 {
+					wr.write(json2.comma_bytes) ?
+				}
+			}
+			e.encode_newline(level - 1, mut wr) ?
+			wr.write([byte(`]`)]) ?
+		}
+		Null {
+			wr.write(json2.null_in_bytes) ?
+		}
 	}
-	res := wr.str()
-	return res
+}
+
+// str returns the JSON string representation of the `map[string]Any` type.
+pub fn (f map[string]Any) str() string {
+	return Any(f).json_str()
+}
+
+// str returns the JSON string representation of the `[]Any` type.
+pub fn (f []Any) str() string {
+	return Any(f).json_str()
 }

 // str returns the string representation of the `Any` type. Use the `json_str` method
@@ -64,113 +135,102 @@ pub fn (f Any) str() string {
 }

 // json_str returns the JSON string representation of the `Any` type.
-pub fn (f Any) json_str() string {
-	match f {
-		string {
-			return json_string(f)
-		}
-		bool, int, u64, i64 {
-			return f.str()
-		}
-		f32 {
-			$if !nofloat ? {
-				str_f32 := f.str()
-				if str_f32.ends_with('.') {
-					return '${str_f32}0'
-				}
-				return str_f32
-			}
-
-			return '0'
-		}
-		f64 {
-			$if !nofloat ? {
-				str_f64 := f.str()
-				if str_f64.ends_with('.') {
-					return '${str_f64}0'
-				}
-				return str_f64
-			}
-			return '0'
-		}
-		map[string]Any {
-			return f.str()
-		}
-		[]Any {
-			return f.str()
-		}
-		Null {
-			return 'null'
-		}
-	}
-}
-
-// char_len_list is a modified version of builtin.utf8_str_len
-// that returns an array of character lengths. (e.g "t✔" => [1,2])
-fn char_len_list(s string) []int {
-	mut l := 1
-	mut ls := []int{}
-	for i := 0; i < s.len; i++ {
-		c := s[i]
-		if (c & (1 << 7)) != 0 {
-			for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
-				l++
-				i++
-			}
-		}
-		ls << l
-		l = 1
-	}
-	return ls
-}
-
-const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t']
-
-// json_string returns the JSON spec-compliant version of the string.
 [manualfree]
-fn json_string(s string) string {
-	// not the best implementation but will revisit it soon
-	char_lens := char_len_list(s)
-	mut sb := strings.new_builder(s.len)
-	mut i := 0
+pub fn (f Any) json_str() string {
+	mut sb := strings.new_builder(4096)
 	defer {
-		unsafe {
-			char_lens.free()
-			// freeing string builder on defer after
-			// returning .str() still isn't working :(
-			// sb.free()
+		unsafe { sb.free() }
+	}
+	mut enc := Encoder{}
+	enc.encode_value(f, mut sb) or { return '' }
+	return sb.str()
+}
+
+// prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type.
+[manualfree]
+pub fn (f Any) prettify_json_str() string {
+	mut sb := strings.new_builder(4096)
+	defer {
+		unsafe { sb.free() }
+	}
+	mut enc := Encoder{
+		newline: `\n`
+		newline_spaces_count: 4
+	}
+	enc.encode_value(f, mut sb) or { return '' }
+	return sb.str()
+}
+
+// CharLengthIterator is an iterator that generates a char
+// length value of every iteration based on the given text.
+// (e.g.: "t✔" => [t => 1, ✔ => 2])
+struct CharLengthIterator {
+	text string
+mut:
+	idx int
+}
+
+fn (mut iter CharLengthIterator) next() ?int {
+	if iter.idx >= iter.text.len {
+		return none
+	}
+	defer {
+		iter.idx++
+	}
+	mut len := 1
+	c := iter.text[iter.idx]
+	if (c & (1 << 7)) != 0 {
+		for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
+			len++
+			iter.idx++
 		}
 	}
+	return len
+}
+
+// encode_string returns the JSON spec-compliant version of the string.
+[manualfree]
+fn (e &Encoder) encode_string(s string, mut wr io.Writer) ? {
+	mut char_lens := CharLengthIterator{
+		text: s
+	}
+	mut i := 0
+	wr.write(json2.quote_bytes) ?
 	for char_len in char_lens {
 		if char_len == 1 {
 			chr := s[i]
 			if chr in important_escapable_chars {
 				for j := 0; j < important_escapable_chars.len; j++ {
 					if chr == important_escapable_chars[j] {
-						sb.write_string(json2.escaped_chars[j])
+						wr.write(json2.escaped_chars[j]) ?
 						break
 					}
 				}
 			} else if chr == `"` || chr == `/` || chr == `\\` {
-				sb.write_string('\\' + chr.ascii_str())
+				wr.write([byte(`\\`), chr]) ?
 			} else if int(chr) < 0x20 {
-				hex_code := chr.hex()
-				sb.write_string('\\u00$hex_code')
+				hex_code := chr.hex().bytes()
+				wr.write(json2.unicode_escape_chars) ? // \u
+				wr.write(json2.zero_in_bytes) ? // \u0
+				wr.write(json2.zero_in_bytes) ? // \u00
+				wr.write(hex_code) ? // \u00xxxx
 			} else {
-				sb.write_byte(chr)
+				wr.write([byte(chr)]) ?
 			}
 		} else {
 			slice := s[i..i + char_len]
-			hex_code := slice.utf32_code().hex()
-			if hex_code.len < 4 {
-				// an utf8 codepoint
-				sb.write_string(slice)
+			hex_code := slice.utf32_code().hex().bytes()
+			if !e.escape_unicode || hex_code.len < 4 {
+				// unescaped non-ASCII char
+				wr.write(slice.bytes()) ?
 			} else if hex_code.len == 4 {
-				sb.write_string('\\u$hex_code')
+				// a unicode endpoint
+				wr.write(json2.unicode_escape_chars) ?
+				wr.write(hex_code) ?
 			} else {
 				// TODO: still figuring out what
 				// to do with more than 4 chars
-				sb.write_byte(` `)
+				wr.write(json2.space_bytes) ?
 			}
 			unsafe {
 				slice.free()
@@ -179,7 +239,6 @@ fn json_string(s string) string {
 		}
 		i += char_len
 	}
-	str := sb.str()
-	unsafe { sb.free() }
-	return str
+
+	wr.write(json2.quote_bytes) ?
 }
--- a/vlib/x/json2/encoder_test.v
+++ b/vlib/x/json2/encoder_test.v
@@ -1,20 +1,21 @@
 import x.json2
+import strings

 fn test_json_string_characters() {
 	text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' }
-	assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/'
+	assert text.json_str() == '"\\n\\r\\b\\f\\t\\\\\\"\\/"'
 }

 fn test_json_escape_low_chars() {
 	esc := '\u001b'
 	assert esc.len == 1
 	text := json2.Any(esc)
-	assert text.json_str() == r'\u001b'
+	assert text.json_str() == r'"\u001b"'
 }

 fn test_json_string() {
 	text := json2.Any('te✔st')
-	assert text.json_str() == r'te\u2714st'
+	assert text.json_str() == r'"te\u2714st"'
 	boolean := json2.Any(true)
 	assert boolean.json_str() == 'true'
 	integer := json2.Any(int(-5))
@@ -27,12 +28,12 @@ fn test_json_string() {

 fn test_json_string_emoji() {
 	text := json2.Any('🐈')
-	assert text.json_str() == r' '
+	assert text.json_str() == r'" "'
 }

 fn test_json_string_non_ascii() {
 	text := json2.Any('ひらがな')
-	assert text.json_str() == r'\u3072\u3089\u304c\u306a'
+	assert text.json_str() == r'"\u3072\u3089\u304c\u306a"'
 }

 fn test_utf8_strings_are_not_modified() ? {
@@ -42,3 +43,48 @@ fn test_utf8_strings_are_not_modified() ? {
 	// dump(deresult)
 	assert deresult.str() == original
 }
+
+fn test_encoder_unescaped_utf32() ? {
+	jap_text := json2.Any('ひらがな')
+	enc := json2.Encoder{
+		escape_unicode: false
+	}
+
+	mut sb := strings.new_builder(20)
+	enc.encode_value(jap_text, mut sb) ?
+
+	assert sb.str() == '"$jap_text"'
+	sb.go_back_to(0)
+
+	emoji_text := json2.Any('🐈')
+	enc.encode_value(emoji_text, mut sb) ?
+	assert sb.str() == '"$emoji_text"'
+}
+
+fn test_encoder_prettify() ? {
+	obj := {
+		'hello': json2.Any('world')
+		'arr':   [json2.Any('im a string'), [json2.Any('3rd level')]]
+		'obj':   {
+			'map': json2.Any('map inside a map')
+		}
+	}
+	enc := json2.Encoder{
+		newline: `\n`
+		newline_spaces_count: 2
+	}
+	mut sb := strings.new_builder(20)
+	enc.encode_value(obj, mut sb) ?
+	assert sb.str() == '{
+  "hello": "world",
+  "arr": [
+    "im a string",
+    [
+      "3rd level"
+    ]
+  ],
+  "obj": {
+    "map": "map inside a map"
+  }
+}'
+}