2021-01-18 15:20:06 +03:00
|
|
|
// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
|
2020-09-10 13:05:40 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
|
|
|
module json2
|
|
|
|
|
|
|
|
import strings
|
|
|
|
import strconv
|
|
|
|
import v.scanner
|
|
|
|
import v.token
|
|
|
|
import v.util
|
|
|
|
import v.pref
|
|
|
|
|
|
|
|
// `Any` is a sum type that lists the possible types to be decoded and used.
|
2020-12-31 14:41:30 +03:00
|
|
|
pub type Any = string | int | i64 | f32 | f64 | bool | Null | []Any | map[string]Any
|
2020-11-29 16:54:45 +03:00
|
|
|
|
2020-09-10 13:05:40 +03:00
|
|
|
// `Null` struct is a simple representation of the `null` value in JSON.
|
2020-11-29 16:54:45 +03:00
|
|
|
pub struct Null {
|
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
|
|
|
|
enum ParseMode {
|
2020-11-29 16:54:45 +03:00
|
|
|
array
|
|
|
|
bool
|
|
|
|
invalid
|
|
|
|
null
|
|
|
|
number
|
|
|
|
object
|
|
|
|
string
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
formfeed_err = 'formfeed not allowed.'
|
2020-11-29 16:54:45 +03:00
|
|
|
eof_err = 'reached eof. data not closed properly.'
|
2020-09-10 13:05:40 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
struct Parser {
|
|
|
|
mut:
|
2020-11-29 16:54:45 +03:00
|
|
|
scanner &scanner.Scanner
|
|
|
|
p_tok token.Token
|
|
|
|
tok token.Token
|
|
|
|
n_tok token.Token
|
|
|
|
mode ParseMode = .invalid
|
|
|
|
n_level int
|
2020-10-09 17:11:55 +03:00
|
|
|
convert_type bool = true
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) next() {
|
|
|
|
p.p_tok = p.tok
|
|
|
|
p.tok = p.n_tok
|
2020-10-09 17:11:55 +03:00
|
|
|
p.n_tok = p.scanner.scan()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (p Parser) emit_error(msg string) string {
|
|
|
|
source := p.scanner.text
|
|
|
|
cur := p.tok
|
|
|
|
mut pp := util.imax(0, util.imin(source.len - 1, cur.pos))
|
|
|
|
if source.len > 0 {
|
2020-10-09 17:11:55 +03:00
|
|
|
for pp >= 0 {
|
|
|
|
if source[pp] !in [`\r`, `\n`] {
|
|
|
|
pp--
|
|
|
|
continue
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
break
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
column := util.imax(0, cur.pos - pp + cur.len - 1)
|
|
|
|
line := cur.line_nr
|
2020-11-09 09:37:53 +03:00
|
|
|
return '[json] $msg ($line:$column)'
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2020-10-09 17:11:55 +03:00
|
|
|
fn new_parser(srce string, convert_type bool) Parser {
|
2020-09-10 13:05:40 +03:00
|
|
|
mut src := srce
|
|
|
|
// from v/util/util.v
|
|
|
|
if src.len >= 3 {
|
|
|
|
c_text := src.str
|
|
|
|
unsafe {
|
|
|
|
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
|
|
|
|
// skip three BOM bytes
|
|
|
|
offset_from_begin := 3
|
|
|
|
src = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
return Parser{
|
2020-11-29 16:54:45 +03:00
|
|
|
scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{})
|
2020-10-09 17:11:55 +03:00
|
|
|
convert_type: convert_type
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-09 17:11:55 +03:00
|
|
|
fn check_valid_hex(str string) ? {
|
2020-09-10 13:05:40 +03:00
|
|
|
if str.len != 4 {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('hex string must be 4 characters.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
for l in str {
|
2020-11-29 16:54:45 +03:00
|
|
|
if l.is_hex_digit() {
|
|
|
|
continue
|
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('provided string is not a hex digit.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
|
2020-10-09 17:11:55 +03:00
|
|
|
fn (mut p Parser) decode() ?Any {
|
|
|
|
p.detect_parse_mode()
|
|
|
|
if p.mode == .invalid {
|
|
|
|
return error(p.emit_error('invalid JSON.'))
|
|
|
|
}
|
|
|
|
fi := p.decode_value() or {
|
|
|
|
return error(p.emit_error(err))
|
|
|
|
}
|
|
|
|
if p.tok.kind != .eof {
|
|
|
|
return error(p.emit_error('unknown token `$p.tok.kind`.'))
|
|
|
|
}
|
|
|
|
return fi
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (p Parser) is_formfeed() bool {
|
|
|
|
prev_tok_pos := p.p_tok.pos + p.p_tok.len - 2
|
|
|
|
if prev_tok_pos < p.scanner.text.len && p.scanner.text[prev_tok_pos] == 0x0c {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
fn (p Parser) is_singlequote() bool {
|
|
|
|
src := p.scanner.text
|
|
|
|
prev_tok_pos := p.p_tok.pos + p.p_tok.len
|
2020-11-29 16:54:45 +03:00
|
|
|
return src[prev_tok_pos] == `\'`
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) detect_parse_mode() {
|
|
|
|
src := p.scanner.text
|
|
|
|
if src.len > 1 && src[0].is_digit() && !src[1].is_digit() {
|
2020-11-26 02:28:41 +03:00
|
|
|
p.mode = .invalid
|
2020-09-10 13:05:40 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
p.tok = p.scanner.scan()
|
|
|
|
p.n_tok = p.scanner.scan()
|
|
|
|
if src.len == 1 && p.tok.kind == .string && p.n_tok.kind == .eof {
|
2020-11-26 02:28:41 +03:00
|
|
|
p.mode = .invalid
|
2020-09-10 13:05:40 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
match p.tok.kind {
|
2020-11-29 16:54:45 +03:00
|
|
|
.lcbr {
|
|
|
|
p.mode = .object
|
|
|
|
}
|
|
|
|
.lsbr {
|
|
|
|
p.mode = .array
|
|
|
|
}
|
|
|
|
.number {
|
|
|
|
p.mode = .number
|
|
|
|
}
|
|
|
|
.key_true, .key_false {
|
|
|
|
p.mode = .bool
|
|
|
|
}
|
|
|
|
.string {
|
|
|
|
p.mode = .string
|
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
.name {
|
|
|
|
if p.tok.lit == 'null' {
|
|
|
|
p.mode = .null
|
|
|
|
}
|
|
|
|
}
|
|
|
|
.minus {
|
|
|
|
if p.n_tok.kind == .number {
|
|
|
|
p.mode = .number
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) decode_value() ?Any {
|
2020-10-09 17:11:55 +03:00
|
|
|
if p.n_level == 500 {
|
|
|
|
return error('reached maximum nesting level of 500.')
|
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr) ||
|
|
|
|
(p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) {
|
2020-09-10 13:05:40 +03:00
|
|
|
p.n_level++
|
|
|
|
}
|
|
|
|
match p.tok.kind {
|
|
|
|
.lsbr {
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_array()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.lcbr {
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_object()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.number {
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_number()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.key_true {
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
2020-11-29 16:54:45 +03:00
|
|
|
return if p.convert_type {
|
|
|
|
Any(true)
|
|
|
|
} else {
|
|
|
|
Any('true')
|
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.key_false {
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
2020-11-29 16:54:45 +03:00
|
|
|
return if p.convert_type {
|
|
|
|
Any(false)
|
|
|
|
} else {
|
|
|
|
Any('false')
|
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.name {
|
|
|
|
if p.tok.lit != 'null' {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('unknown identifier `$p.tok.lit`')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
2020-11-29 16:54:45 +03:00
|
|
|
return if p.convert_type {
|
|
|
|
Any(Null{})
|
|
|
|
} else {
|
|
|
|
Any('null')
|
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
.string {
|
|
|
|
if p.is_singlequote() {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('strings must be in double-quotes.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_string()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
else {
|
2020-11-29 16:54:45 +03:00
|
|
|
if p.tok.kind == .minus && p.n_tok.kind == .number && p.n_tok.pos == p.tok.pos + 1 {
|
2020-09-10 13:05:40 +03:00
|
|
|
p.next()
|
2020-11-29 16:54:45 +03:00
|
|
|
d_num := p.decode_number() ?
|
2020-10-09 17:11:55 +03:00
|
|
|
return d_num
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
return error("unknown token '$p.tok.lit' when decoding value")
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if p.is_formfeed() {
|
|
|
|
return error(formfeed_err)
|
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
return Any{}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) decode_string() ?Any {
|
|
|
|
mut strwr := strings.new_builder(200)
|
|
|
|
for i := 0; i < p.tok.lit.len; i++ {
|
2020-11-29 16:54:45 +03:00
|
|
|
if ((i - 1 >= 0 && p.tok.lit[i - 1] != `/`) || i == 0) && int(p.tok.lit[i]) in [9, 10, 0] {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('character must be escaped with a backslash.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
if i == p.tok.lit.len - 1 && p.tok.lit[i] == 92 {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('invalid backslash escape.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
if i + 1 < p.tok.lit.len && p.tok.lit[i] == 92 {
|
|
|
|
peek := p.tok.lit[i + 1]
|
|
|
|
match peek {
|
2020-11-15 15:58:17 +03:00
|
|
|
`b` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\b`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`f` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\f`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`n` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\n`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`r` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\r`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`t` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\t`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`u` {
|
2020-11-29 16:54:45 +03:00
|
|
|
if i + 5 < p.tok.lit.len {
|
|
|
|
codepoint := p.tok.lit[i + 2..i + 6]
|
|
|
|
check_valid_hex(codepoint) ?
|
2020-09-10 13:05:40 +03:00
|
|
|
hex_val := strconv.parse_int(codepoint, 16, 0)
|
|
|
|
strwr.write_b(byte(hex_val))
|
|
|
|
i += 5
|
|
|
|
continue
|
|
|
|
} else {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('incomplete unicode escape.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
2020-11-15 15:58:17 +03:00
|
|
|
`\\` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\\`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`"` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`\"`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
`/` {
|
|
|
|
i++
|
|
|
|
strwr.write_b(`/`)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
else { return error('invalid backslash escape.') }
|
2020-11-25 14:09:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
if int(peek) == 85 {
|
|
|
|
return error('unicode endpoints must be in lowercase `u`.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if int(peek) in [9, 229] {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('unicode endpoint not allowed.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
strwr.write_b(p.tok.lit[i])
|
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
|
|
|
defer {
|
2021-02-17 03:19:26 +03:00
|
|
|
unsafe { strwr.free() }
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
|
|
|
str := strwr.str()
|
|
|
|
return Any(str)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2020-10-09 17:11:55 +03:00
|
|
|
// now returns string instead of int or float
|
2020-09-10 13:05:40 +03:00
|
|
|
fn (mut p Parser) decode_number() ?Any {
|
|
|
|
src := p.scanner.text
|
|
|
|
mut tl := p.tok.lit
|
|
|
|
mut is_fl := false
|
|
|
|
sep_by_dot := tl.to_lower().split('.')
|
|
|
|
if tl.starts_with('0x') && tl.all_after('0x').len <= 2 {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('hex numbers should not be less than or equal to two digits.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if src[p.p_tok.pos + p.p_tok.len] == `0` && src[p.p_tok.pos + p.p_tok.len + 1].is_digit() {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('leading zeroes in integers are not allowed.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if tl.starts_with('.') {
|
2020-11-09 09:37:53 +03:00
|
|
|
return error('decimals must start with a digit followed by a dot.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if tl.ends_with('+') || tl.ends_with('-') {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('exponents must have a digit before the sign.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if sep_by_dot.len > 1 {
|
|
|
|
// analyze json number structure
|
|
|
|
// -[digit][dot][digit][E/e][-/+][digit]
|
2020-10-09 17:11:55 +03:00
|
|
|
// float number
|
2020-09-10 13:05:40 +03:00
|
|
|
is_fl = true
|
|
|
|
last := sep_by_dot.last()
|
|
|
|
if last.starts_with('e') {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('exponents must have a digit before the exponent notation.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
if p.p_tok.kind == .minus && p.tok.pos == p.p_tok.pos + 1 {
|
2020-10-09 17:11:55 +03:00
|
|
|
tl = '-$tl'
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
|
|
|
if p.convert_type {
|
|
|
|
return if is_fl {
|
|
|
|
Any(tl.f64())
|
|
|
|
} else {
|
2020-11-09 09:37:53 +03:00
|
|
|
Any(tl.i64())
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return Any(tl)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) decode_array() ?Any {
|
|
|
|
mut items := []Any{}
|
|
|
|
p.next()
|
|
|
|
for p.tok.kind != .rsbr {
|
|
|
|
if p.tok.kind == .eof {
|
|
|
|
return error(eof_err)
|
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
item := p.decode_value() ?
|
2020-09-10 13:05:40 +03:00
|
|
|
items << item
|
|
|
|
if p.tok.kind == .comma && p.n_tok.kind !in [.rsbr, .comma] {
|
|
|
|
p.next()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if p.tok.kind == .rsbr {
|
|
|
|
break
|
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
return error("unknown token '$p.tok.lit' when decoding arrays.")
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
2020-09-10 13:05:40 +03:00
|
|
|
return Any(items)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) decode_object() ?Any {
|
2020-11-29 16:54:45 +03:00
|
|
|
mut fields := map[string]Any{}
|
2020-09-10 13:05:40 +03:00
|
|
|
mut cur_key := ''
|
|
|
|
p.next()
|
|
|
|
for p.tok.kind != .rcbr {
|
|
|
|
is_key := p.tok.kind == .string && p.n_tok.kind == .colon
|
|
|
|
// todo
|
|
|
|
// if p.is_formfeed() {
|
2020-11-29 16:54:45 +03:00
|
|
|
// return error(formfeed_err)
|
2020-09-10 13:05:40 +03:00
|
|
|
// }
|
|
|
|
if p.tok.kind == .eof {
|
|
|
|
return error(eof_err)
|
|
|
|
}
|
|
|
|
if p.is_singlequote() {
|
2020-10-09 17:11:55 +03:00
|
|
|
return error('object keys must be in single quotes.')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
if !is_key {
|
2020-11-29 16:54:45 +03:00
|
|
|
return error("invalid token `$p.tok.lit`, expected \'string\'")
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
cur_key = p.tok.lit
|
|
|
|
p.next()
|
|
|
|
p.next()
|
2020-11-29 16:54:45 +03:00
|
|
|
fields[cur_key] = p.decode_value() ?
|
2020-09-10 13:05:40 +03:00
|
|
|
if p.tok.kind == .comma && p.n_tok.kind !in [.rcbr, .comma] {
|
|
|
|
p.next()
|
|
|
|
continue
|
2020-10-09 17:11:55 +03:00
|
|
|
} else if p.tok.kind == .rcbr {
|
2020-09-10 13:05:40 +03:00
|
|
|
break
|
|
|
|
}
|
2020-11-29 16:54:45 +03:00
|
|
|
return error("unknown token '$p.tok.lit' when decoding object.")
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
p.next()
|
2020-09-10 13:05:40 +03:00
|
|
|
return Any(fields)
|
|
|
|
}
|