2023-03-28 23:55:57 +03:00
|
|
|
// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
|
2020-09-10 13:05:40 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
|
|
|
module json2
|
|
|
|
|
2022-03-05 14:02:43 +03:00
|
|
|
fn format_message(msg string, line int, column int) string {
|
2022-11-15 16:53:13 +03:00
|
|
|
return '[x.json2] ${msg} (${line}:${column})'
|
2022-03-05 14:02:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct DecodeError {
|
|
|
|
line int
|
|
|
|
column int
|
|
|
|
message string
|
|
|
|
}
|
|
|
|
|
|
|
|
// code returns the error code of DecodeError
|
|
|
|
pub fn (err DecodeError) code() int {
|
|
|
|
return 3
|
|
|
|
}
|
|
|
|
|
|
|
|
// msg returns the message of the DecodeError
|
|
|
|
pub fn (err DecodeError) msg() string {
|
|
|
|
return format_message(err.message, err.line, err.column)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct InvalidTokenError {
|
|
|
|
DecodeError
|
|
|
|
token Token
|
|
|
|
expected TokenKind
|
|
|
|
}
|
|
|
|
|
|
|
|
// code returns the error code of the InvalidTokenError
|
|
|
|
pub fn (err InvalidTokenError) code() int {
|
|
|
|
return 2
|
|
|
|
}
|
|
|
|
|
|
|
|
// msg returns the message of the InvalidTokenError
|
|
|
|
pub fn (err InvalidTokenError) msg() string {
|
2022-11-15 16:53:13 +03:00
|
|
|
footer_text := if err.expected != .none_ { ', expecting `${err.expected}`' } else { '' }
|
|
|
|
return format_message('invalid token `${err.token.kind}`${footer_text}', err.token.line,
|
2022-03-05 14:02:43 +03:00
|
|
|
err.token.full_col())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct UnknownTokenError {
|
|
|
|
DecodeError
|
|
|
|
token Token
|
|
|
|
kind ValueKind = .unknown
|
|
|
|
}
|
|
|
|
|
|
|
|
// code returns the error code of the UnknownTokenError
|
|
|
|
pub fn (err UnknownTokenError) code() int {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
|
|
|
// msg returns the error message of the UnknownTokenError
|
|
|
|
pub fn (err UnknownTokenError) msg() string {
|
2022-11-15 16:53:13 +03:00
|
|
|
return format_message("unknown token '${err.token.lit}' when decoding ${err.kind}.",
|
2022-03-05 14:02:43 +03:00
|
|
|
err.token.line, err.token.full_col())
|
|
|
|
}
|
|
|
|
|
2020-09-10 13:05:40 +03:00
|
|
|
struct Parser {
|
2022-12-05 17:58:44 +03:00
|
|
|
pub mut:
|
2022-09-15 07:59:31 +03:00
|
|
|
scanner &Scanner = unsafe { nil }
|
2023-06-27 20:07:44 +03:00
|
|
|
prev_tok Token
|
2021-02-26 09:36:02 +03:00
|
|
|
tok Token
|
2023-06-27 20:07:44 +03:00
|
|
|
next_tok Token
|
2020-11-29 16:54:45 +03:00
|
|
|
n_level int
|
2020-10-09 17:11:55 +03:00
|
|
|
convert_type bool = true
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
fn (mut p Parser) next() {
|
2023-06-27 20:07:44 +03:00
|
|
|
p.prev_tok = p.tok
|
|
|
|
p.tok = p.next_tok
|
|
|
|
p.next_tok = p.scanner.scan()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2022-10-16 09:28:57 +03:00
|
|
|
fn (mut p Parser) next_with_err() ! {
|
2021-02-26 09:36:02 +03:00
|
|
|
p.next()
|
|
|
|
if p.tok.kind == .error {
|
2022-10-28 19:08:30 +03:00
|
|
|
return DecodeError{
|
2022-03-05 14:02:43 +03:00
|
|
|
line: p.tok.line
|
|
|
|
column: p.tok.full_col()
|
|
|
|
message: p.tok.lit.bytestr()
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-26 09:36:02 +03:00
|
|
|
// TODO: copied from v.util to avoid the entire module and its functions
|
|
|
|
// from being imported. remove later once -skip-unused is enabled by default.
|
2023-06-27 20:07:44 +03:00
|
|
|
// skip_bom - skip Byte Order Mark (BOM)
|
|
|
|
// The UTF-8 BOM is a sequence of Bytes at the start of a text-stream (EF BB BF or \ufeff)
|
|
|
|
// that allows the reader to reliably determine if file is being encoded in UTF-8.
|
2021-02-26 09:36:02 +03:00
|
|
|
fn skip_bom(file_content string) string {
|
|
|
|
mut raw_text := file_content
|
|
|
|
// BOM check
|
|
|
|
if raw_text.len >= 3 {
|
2020-09-10 13:05:40 +03:00
|
|
|
unsafe {
|
2021-02-26 09:36:02 +03:00
|
|
|
c_text := raw_text.str
|
2020-09-10 13:05:40 +03:00
|
|
|
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
|
|
|
|
// skip three BOM bytes
|
|
|
|
offset_from_begin := 3
|
2021-02-26 09:36:02 +03:00
|
|
|
raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
return raw_text
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2023-06-27 20:07:44 +03:00
|
|
|
// new_parser - create a instance of Parser{}
|
2021-02-26 09:36:02 +03:00
|
|
|
fn new_parser(srce string, convert_type bool) Parser {
|
|
|
|
src := skip_bom(srce)
|
|
|
|
return Parser{
|
|
|
|
scanner: &Scanner{
|
|
|
|
text: src.bytes()
|
2020-11-29 16:54:45 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
convert_type: convert_type
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
|
2023-06-27 20:07:44 +03:00
|
|
|
// decode - decodes provided JSON
|
2023-01-05 16:41:18 +03:00
|
|
|
pub fn (mut p Parser) decode() !Any {
|
2021-02-26 09:36:02 +03:00
|
|
|
p.next()
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
|
|
|
fi := p.decode_value()!
|
2020-10-09 17:11:55 +03:00
|
|
|
if p.tok.kind != .eof {
|
2022-10-28 19:08:30 +03:00
|
|
|
return InvalidTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
|
|
|
return fi
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2022-10-16 09:28:57 +03:00
|
|
|
fn (mut p Parser) decode_value() !Any {
|
2021-06-28 14:05:27 +03:00
|
|
|
if p.n_level + 1 == 500 {
|
2022-10-28 19:08:30 +03:00
|
|
|
return DecodeError{
|
2022-03-05 14:02:43 +03:00
|
|
|
message: 'reached maximum nesting level of 500'
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-10-09 17:11:55 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
match p.tok.kind {
|
2023-06-27 20:07:44 +03:00
|
|
|
// `[`
|
2020-09-10 13:05:40 +03:00
|
|
|
.lsbr {
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_array()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2023-06-27 20:07:44 +03:00
|
|
|
// `{`
|
2020-09-10 13:05:40 +03:00
|
|
|
.lcbr {
|
2020-10-09 17:11:55 +03:00
|
|
|
return p.decode_object()
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
.int_, .float {
|
|
|
|
tl := p.tok.lit.bytestr()
|
|
|
|
kind := p.tok.kind
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-02-26 09:36:02 +03:00
|
|
|
if p.convert_type {
|
2022-01-10 13:42:41 +03:00
|
|
|
$if !nofloat ? {
|
|
|
|
if kind == .float {
|
|
|
|
return Any(tl.f64())
|
|
|
|
}
|
2021-03-22 17:45:29 +03:00
|
|
|
}
|
|
|
|
return Any(tl.i64())
|
2020-11-29 16:54:45 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
return Any(tl)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
.bool_ {
|
|
|
|
lit := p.tok.lit.bytestr()
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-03-22 17:45:29 +03:00
|
|
|
if p.convert_type {
|
|
|
|
return Any(lit.bool())
|
|
|
|
}
|
|
|
|
return Any(lit)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
.null {
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-03-22 17:45:29 +03:00
|
|
|
if p.convert_type {
|
|
|
|
return Any(null)
|
|
|
|
}
|
|
|
|
return Any('null')
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
.str_ {
|
|
|
|
str := p.tok.lit.bytestr()
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-02-26 09:36:02 +03:00
|
|
|
return Any(str)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-10-28 19:08:30 +03:00
|
|
|
return InvalidTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
2021-04-11 11:28:52 +03:00
|
|
|
return Any(null)
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
|
2021-11-28 19:31:41 +03:00
|
|
|
[manualfree]
|
2022-10-16 09:28:57 +03:00
|
|
|
fn (mut p Parser) decode_array() !Any {
|
2020-09-10 13:05:40 +03:00
|
|
|
mut items := []Any{}
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-06-28 14:05:27 +03:00
|
|
|
p.n_level++
|
2023-06-27 20:07:44 +03:00
|
|
|
// `]`
|
2020-09-10 13:05:40 +03:00
|
|
|
for p.tok.kind != .rsbr {
|
2022-10-16 09:28:57 +03:00
|
|
|
item := p.decode_value()!
|
2020-09-10 13:05:40 +03:00
|
|
|
items << item
|
2021-02-26 09:36:02 +03:00
|
|
|
if p.tok.kind == .comma {
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2022-03-05 14:02:43 +03:00
|
|
|
if p.tok.kind == .rsbr {
|
2022-10-28 19:08:30 +03:00
|
|
|
return InvalidTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2021-02-26 09:36:02 +03:00
|
|
|
}
|
2022-03-05 14:02:43 +03:00
|
|
|
} else if p.tok.kind != .rsbr {
|
2022-10-28 19:08:30 +03:00
|
|
|
return UnknownTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
|
|
|
kind: .array
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-06-28 14:05:27 +03:00
|
|
|
p.n_level--
|
2020-09-10 13:05:40 +03:00
|
|
|
return Any(items)
|
|
|
|
}
|
|
|
|
|
2022-10-16 09:28:57 +03:00
|
|
|
fn (mut p Parser) decode_object() !Any {
|
2020-11-29 16:54:45 +03:00
|
|
|
mut fields := map[string]Any{}
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2021-06-28 14:05:27 +03:00
|
|
|
p.n_level++
|
2023-06-27 20:07:44 +03:00
|
|
|
// `}`
|
2020-09-10 13:05:40 +03:00
|
|
|
for p.tok.kind != .rcbr {
|
2023-06-27 20:07:44 +03:00
|
|
|
// step 1 -> key
|
2022-03-05 14:02:43 +03:00
|
|
|
if p.tok.kind != .str_ {
|
2022-10-28 19:08:30 +03:00
|
|
|
return InvalidTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
|
|
|
expected: .str_
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
2022-03-05 14:02:43 +03:00
|
|
|
|
2021-02-26 09:36:02 +03:00
|
|
|
cur_key := p.tok.lit.bytestr()
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2023-06-27 20:07:44 +03:00
|
|
|
// step 2 -> colon separator
|
2022-03-05 14:02:43 +03:00
|
|
|
if p.tok.kind != .colon {
|
2022-10-28 19:08:30 +03:00
|
|
|
return InvalidTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
|
|
|
expected: .colon
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2022-03-05 14:02:43 +03:00
|
|
|
}
|
|
|
|
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2023-06-27 20:07:44 +03:00
|
|
|
// step 3 -> value
|
2022-10-16 09:28:57 +03:00
|
|
|
fields[cur_key] = p.decode_value()!
|
2022-03-05 14:02:43 +03:00
|
|
|
if p.tok.kind != .comma && p.tok.kind != .rcbr {
|
2022-10-28 19:08:30 +03:00
|
|
|
return UnknownTokenError{
|
2022-03-05 14:02:43 +03:00
|
|
|
token: p.tok
|
|
|
|
kind: .object
|
2022-10-28 19:08:30 +03:00
|
|
|
}
|
2022-03-05 14:02:43 +03:00
|
|
|
} else if p.tok.kind == .comma {
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2020-09-10 13:05:40 +03:00
|
|
|
}
|
|
|
|
}
|
2022-10-16 09:28:57 +03:00
|
|
|
p.next_with_err()!
|
2023-06-27 20:07:44 +03:00
|
|
|
// step 4 -> eof (end)
|
2021-06-28 14:05:27 +03:00
|
|
|
p.n_level--
|
2020-09-10 13:05:40 +03:00
|
|
|
return Any(fields)
|
|
|
|
}
|