v/vlib/x/json2/scanner.v

// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module json2

import strconv

struct Scanner {
mut:
	text []u8
	pos  int // the position of the token in scanner text
	line int
	col  int
}

enum TokenKind {
	none_
	error
	str_
	float
	int_
	null
	bool_
	eof
	comma = 44 // ,
	colon = 58 // :
	lsbr = 91 // [
	rsbr = 93 // ]
	lcbr = 123 // {
	rcbr = 125 // }
}

pub struct Token {
	lit  []u8      // literal representation of the token
	kind TokenKind // the token number/enum; for quick comparisons
	line int       // the line in the source where the token occured
	col  int       // the column in the source where the token occured
}

// full_col returns the full column information which includes the length
pub fn (t Token) full_col() int {
	return t.col + t.lit.len
}

const (
	// list of characters commonly used in JSON.
	char_list                 = [`{`, `}`, `[`, `]`, `,`, `:`]
	// list of newlines to check when moving to a new position.
	newlines                  = [`\r`, `\n`, `\t`]
	// list of escapable that needs to be escaped inside a JSON string.
	// double quotes and forward slashes are excluded intentionally since
	// they have their own separate checks for it in order to pass the
	// JSON test suite (https://github.com/nst/JSONTestSuite/).
	important_escapable_chars = [`\b`, `\f`, `\n`, `\r`, `\t`]
	// list of valid unicode escapes aside from \u{4-hex digits}
	valid_unicode_escapes     = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
	// used for transforming escapes into valid unicode (eg. n => \n)
	unicode_transform_escapes = {
		98:  `\b`
		102: `\f`
		110: `\n`
		114: `\r`
		116: `\t`
		92:  `\\`
		34:  `"`
		47:  `/`
	}
	exp_signs = [u8(`-`), `+`]
)

// move_pos proceeds to the next position.
fn (mut s Scanner) move() {
	s.move_pos(true, true)
}

// move_pos_with_newlines is the same as move_pos but only enables newline checking.
fn (mut s Scanner) move_pos_with_newlines() {
	s.move_pos(false, true)
}

fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
	s.pos++
	if s.pos < s.text.len {
		if include_newlines && s.text[s.pos] in json2.newlines {
			s.line++
			s.col = 0
			if s.text[s.pos] == `\r` && s.pos + 1 < s.text.len && s.text[s.pos + 1] == `\n` {
				s.pos++
			}
			for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
				s.move()
			}
		} else if include_space && s.text[s.pos] == ` ` {
			s.pos++
			s.col++
			for s.pos < s.text.len && s.text[s.pos] == ` ` {
				s.move()
			}
		}
	} else {
		s.col++
	}
}

// error returns an error token.
fn (s Scanner) error(description string) Token {
	return s.tokenize(description.bytes(), .error)
}

// tokenize returns a token based on the given lit and kind.
fn (s Scanner) tokenize(lit []u8, kind TokenKind) Token {
	return Token{
		lit: lit
		kind: kind
		col: s.col
		line: s.line
	}
}

// text_scan scans and returns a string token.
[manualfree]
fn (mut s Scanner) text_scan() Token {
	mut has_closed := false
	mut chrs := []u8{}
	for {
		s.pos++
		s.col++
		if s.pos >= s.text.len {
			break
		}
		ch := s.text[s.pos]
		if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`) && ch == `"` {
			has_closed = true
			break
		} else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
			&& ch in json2.important_escapable_chars {
			return s.error('character must be escaped with a backslash')
		} else if (s.pos == s.text.len - 1 && ch == `\\`) || ch == u8(0) {
			return s.error('invalid backslash escape')
		} else if s.pos + 1 < s.text.len && ch == `\\` {
			peek := s.text[s.pos + 1]
			if peek in json2.valid_unicode_escapes {
				chrs << json2.unicode_transform_escapes[int(peek)]
				s.pos++
				s.col++
				continue
			} else if peek == `u` {
				if s.pos + 5 < s.text.len {
					s.pos++
					s.col++
					mut codepoint := []u8{}
					codepoint_start := s.pos
					for s.pos < s.text.len && s.pos < codepoint_start + 4 {
						s.pos++
						s.col++
						if s.text[s.pos] == `"` {
							break
						} else if !s.text[s.pos].is_hex_digit() {
							x := s.text[s.pos].ascii_str()
							return s.error('`${x}` is not a hex digit')
						}
						codepoint << s.text[s.pos]
					}
					if codepoint.len != 4 {
						return s.error('unicode escape must have 4 hex digits')
					}
					val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 })
					converted := utf32_to_str(val)
					converted_bytes := converted.bytes()
					chrs << converted_bytes
					unsafe {
						converted.free()
						converted_bytes.free()
						codepoint.free()
					}
					continue
				} else {
					return s.error('incomplete unicode escape')
				}
			} else if peek == `U` {
				return s.error('unicode endpoints must be in lowercase `u`')
			} else if peek == u8(229) {
				return s.error('unicode endpoint not allowed')
			} else {
				return s.error('invalid backslash escape')
			}
		}
		chrs << ch
	}
	tok := s.tokenize(chrs, .str_)
	s.move()
	if !has_closed {
		return s.error('missing double quotes in string closing')
	}
	return tok
}

// num_scan scans and returns an int/float token.
fn (mut s Scanner) num_scan() Token {
	// analyze json number structure
	// -[digit][?[dot][digit]][?[E/e][?-/+][digit]]
	mut is_fl := false
	mut dot_index := -1
	mut digits := []u8{}
	if s.text[s.pos] == `-` {
		digits << `-`
		if !s.text[s.pos + 1].is_digit() {
			return s.invalid_token()
		}
		s.move_pos_with_newlines()
	}
	if s.text[s.pos] == `0` && (s.pos + 1 < s.text.len && s.text[s.pos + 1].is_digit()) {
		return s.error('leading zeroes in a number are not allowed')
	}
	for s.pos < s.text.len && (s.text[s.pos].is_digit() || (!is_fl && s.text[s.pos] == `.`)) {
		digits << s.text[s.pos]
		if s.text[s.pos] == `.` {
			is_fl = true
			dot_index = digits.len - 1
		}
		s.move_pos_with_newlines()
	}
	if dot_index + 1 < s.text.len && digits[dot_index + 1..].len == 0 {
		return s.error('invalid float')
	}
	if s.pos < s.text.len && (s.text[s.pos] == `e` || s.text[s.pos] == `E`) {
		digits << s.text[s.pos]
		s.move_pos_with_newlines()
		if s.pos < s.text.len && s.text[s.pos] in json2.exp_signs {
			digits << s.text[s.pos]
			s.move_pos_with_newlines()
		}
		mut exp_digits_count := 0
		for s.pos < s.text.len && s.text[s.pos].is_digit() {
			digits << s.text[s.pos]
			exp_digits_count++
			s.move_pos_with_newlines()
		}
		if exp_digits_count == 0 {
			return s.error('invalid exponent')
		}
	}
	kind := if is_fl { TokenKind.float } else { TokenKind.int_ }
	return s.tokenize(digits, kind)
}

// invalid_token returns an error token with the invalid token message.
fn (s Scanner) invalid_token() Token {
	if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
		x := s.text[s.pos].ascii_str()
		return s.error('invalid token `${x}`')
	} else {
		x := s.text[s.pos].str_escaped()
		return s.error('invalid token `${x}`')
	}
}

// scan returns a token based on the scanner's current position.
// used to set the next token
[manualfree]
fn (mut s Scanner) scan() Token {
	if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
		s.move()
	}
	if s.pos >= s.text.len {
		return s.tokenize([]u8{}, .eof)
	} else if s.pos + 3 < s.text.len && (s.text[s.pos] == `t` || s.text[s.pos] == `n`) {
		ident := s.text[s.pos..s.pos + 4].bytestr()
		if ident == 'true' || ident == 'null' {
			mut kind := TokenKind.null
			if ident == 'true' {
				kind = .bool_
			}
			unsafe { ident.free() }
			val := s.text[s.pos..s.pos + 4]
			tok := s.tokenize(val, kind)
			s.move() // n / t
			s.move() // u / r
			s.move() // l / u
			s.move() // l / e
			return tok
		}
		unsafe { ident.free() }
		return s.invalid_token()
	} else if s.pos + 4 < s.text.len && s.text[s.pos] == `f` {
		ident := s.text[s.pos..s.pos + 5].bytestr()
		if ident == 'false' {
			unsafe { ident.free() }
			val := s.text[s.pos..s.pos + 5]
			tok := s.tokenize(val, .bool_)
			s.move() // f
			s.move() // a
			s.move() // l
			s.move() // s
			s.move() // e
			return tok
		}
		unsafe { ident.free() }
		return s.invalid_token()
	} else if s.text[s.pos] in json2.char_list {
		chr := s.text[s.pos]
		tok := s.tokenize([]u8{}, unsafe { TokenKind(int(chr)) })
		s.move()
		return tok
	} else if s.text[s.pos] == `"` {
		return s.text_scan()
	} else if s.text[s.pos].is_digit() || s.text[s.pos] == `-` {
		return s.num_scan()
	} else {
		return s.invalid_token()
	}
}
all: 2023 copyright 2023-03-28 23:55:57 +03:00			`// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`// Use of this source code is governed by an MIT license`
			`// that can be found in the LICENSE file.`
			`module json2`

			`import strconv`

			`struct Scanner {`
			`mut:`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`text []u8`
json2: small refactor (#16913) 2023-06-27 20:07:44 +03:00			`pos int // the position of the token in scanner text`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`line int`
			`col int`
			`}`

			`enum TokenKind {`
			`none_`
			`error`
			`str_`
			`float`
			`int_`
			`null`
			`bool_`
			`eof`
json2: small refactor (#16913) 2023-06-27 20:07:44 +03:00			`comma = 44 // ,`
			`colon = 58 // :`
			`lsbr = 91 // [`
			`rsbr = 93 // ]`
			`lcbr = 123 // {`
			`rcbr = 125 // }`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`

x/json2: fix decoder errors (#13655) * x/json2: fix decoder errors, refactoring * x/json2: add error-catching tests * x/json2: add missing docs * x/json2: fmt 2022-03-05 14:02:43 +03:00			`pub struct Token {`
json2: small refactor (#16913) 2023-06-27 20:07:44 +03:00			`lit []u8 // literal representation of the token`
			`kind TokenKind // the token number/enum; for quick comparisons`
			`line int // the line in the source where the token occured`
			`col int // the column in the source where the token occured`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`

x/json2: fix decoder errors (#13655) * x/json2: fix decoder errors, refactoring * x/json2: add error-catching tests * x/json2: add missing docs * x/json2: fmt 2022-03-05 14:02:43 +03:00			`// full_col returns the full column information which includes the length`
			`pub fn (t Token) full_col() int {`
			`return t.col + t.lit.len`
			`}`

x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`const (`
			`// list of characters commonly used in JSON.`
			char_list = [`{`, `}`, `[`, `]`, `,`, `:`]
			`// list of newlines to check when moving to a new position.`
x.json2: proper string encoding + minor fixes (#9026) 2021-03-01 12:22:36 +03:00			newlines = [`\r`, `\n`, `\t`]
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`// list of escapable that needs to be escaped inside a JSON string.`
			`// double quotes and forward slashes are excluded intentionally since`
			`// they have their own separate checks for it in order to pass the`
			`// JSON test suite (https://github.com/nst/JSONTestSuite/).`
x.json2: proper string encoding + minor fixes (#9026) 2021-03-01 12:22:36 +03:00			important_escapable_chars = [`\b`, `\f`, `\n`, `\r`, `\t`]
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`// list of valid unicode escapes aside from \u{4-hex digits}`
			valid_unicode_escapes = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
			`// used for transforming escapes into valid unicode (eg. n => \n)`
vlib: remove deprecated map{} usages as well as deprecated functions (#11035) 2021-08-04 12:44:41 +03:00			`unicode_transform_escapes = {`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			98: `\b`
			102: `\f`
			110: `\n`
			114: `\r`
			116: `\t`
			92: `\\`
			34: `"`
			47: `/`
			`}`
all: byte => u8 2022-04-15 14:58:56 +03:00			exp_signs = [u8(`-`), `+`]
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`)`

			`// move_pos proceeds to the next position.`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`fn (mut s Scanner) move() {`
			`s.move_pos(true, true)`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`

			`// move_pos_with_newlines is the same as move_pos but only enables newline checking.`
			`fn (mut s Scanner) move_pos_with_newlines() {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move_pos(false, true)`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`

x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`s.pos++`
			`if s.pos < s.text.len {`
			`if include_newlines && s.text[s.pos] in json2.newlines {`
			`s.line++`
			`s.col = 0`
			if s.text[s.pos] == `\r` && s.pos + 1 < s.text.len && s.text[s.pos + 1] == `\n` {
			`s.pos++`
			`}`
			`for s.pos < s.text.len && s.text[s.pos] in json2.newlines {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move()`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`
			} else if include_space && s.text[s.pos] == ` ` {
			`s.pos++`
			`s.col++`
			for s.pos < s.text.len && s.text[s.pos] == ` ` {
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move()`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`
			`}`
			`} else {`
			`s.col++`
			`}`
			`}`

			`// error returns an error token.`
			`fn (s Scanner) error(description string) Token {`
			`return s.tokenize(description.bytes(), .error)`
			`}`

			`// tokenize returns a token based on the given lit and kind.`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`fn (s Scanner) tokenize(lit []u8, kind TokenKind) Token {`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return Token{`
			`lit: lit`
			`kind: kind`
			`col: s.col`
			`line: s.line`
			`}`
			`}`

			`// text_scan scans and returns a string token.`
			`[manualfree]`
			`fn (mut s Scanner) text_scan() Token {`
			`mut has_closed := false`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`mut chrs := []u8{}`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`for {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.pos++`
			`s.col++`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`if s.pos >= s.text.len {`
			`break`
			`}`
			`ch := s.text[s.pos]`
			if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`) && ch == `"` {
			`has_closed = true`
			`break`
			} else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
			`&& ch in json2.important_escapable_chars {`
			`return s.error('character must be escaped with a backslash')`
all: byte => u8 2022-04-15 14:58:56 +03:00			} else if (s.pos == s.text.len - 1 && ch == `\\`) \|\| ch == u8(0) {
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return s.error('invalid backslash escape')`
			} else if s.pos + 1 < s.text.len && ch == `\\` {
			`peek := s.text[s.pos + 1]`
			`if peek in json2.valid_unicode_escapes {`
			`chrs << json2.unicode_transform_escapes[int(peek)]`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.pos++`
			`s.col++`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`continue`
			} else if peek == `u` {
			`if s.pos + 5 < s.text.len {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.pos++`
			`s.col++`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`mut codepoint := []u8{}`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`codepoint_start := s.pos`
			`for s.pos < s.text.len && s.pos < codepoint_start + 4 {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.pos++`
			`s.col++`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			if s.text[s.pos] == `"` {
			`break`
			`} else if !s.text[s.pos].is_hex_digit() {`
tests: add vlib/x/json2 to vtest-cleancode.v 2021-04-11 11:34:08 +03:00			`x := s.text[s.pos].ascii_str()`
vfmt: change all '$expr' to '${expr}' (#16428) 2022-11-15 16:53:13 +03:00			return s.error('`${x}` is not a hex digit')
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`
			`codepoint << s.text[s.pos]`
			`}`
			`if codepoint.len != 4 {`
			`return s.error('unicode escape must have 4 hex digits')`
			`}`
strconv: fix `atoi` returning 0 on large strings (#10635) 2021-07-02 10:39:57 +03:00			`val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 })`
x.json2: proper string encoding + minor fixes (#9026) 2021-03-01 12:22:36 +03:00			`converted := utf32_to_str(val)`
			`converted_bytes := converted.bytes()`
			`chrs << converted_bytes`
			`unsafe {`
			`converted.free()`
			`converted_bytes.free()`
			`codepoint.free()`
			`}`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`continue`
			`} else {`
			`return s.error('incomplete unicode escape')`
			`}`
			} else if peek == `U` {
			return s.error('unicode endpoints must be in lowercase `u`')
all: byte => u8 2022-04-15 14:58:56 +03:00			`} else if peek == u8(229) {`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return s.error('unicode endpoint not allowed')`
			`} else {`
			`return s.error('invalid backslash escape')`
			`}`
			`}`
			`chrs << ch`
			`}`
			`tok := s.tokenize(chrs, .str_)`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move()`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`if !has_closed {`
			`return s.error('missing double quotes in string closing')`
			`}`
			`return tok`
			`}`

			`// num_scan scans and returns an int/float token.`
			`fn (mut s Scanner) num_scan() Token {`
			`// analyze json number structure`
			`// -[digit][?[dot][digit]][?[E/e][?-/+][digit]]`
			`mut is_fl := false`
			`mut dot_index := -1`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`mut digits := []u8{}`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			if s.text[s.pos] == `-` {
			digits << `-`
			`if !s.text[s.pos + 1].is_digit() {`
			`return s.invalid_token()`
			`}`
			`s.move_pos_with_newlines()`
			`}`
			if s.text[s.pos] == `0` && (s.pos + 1 < s.text.len && s.text[s.pos + 1].is_digit()) {
			`return s.error('leading zeroes in a number are not allowed')`
			`}`
			for s.pos < s.text.len && (s.text[s.pos].is_digit() \|\| (!is_fl && s.text[s.pos] == `.`)) {
			`digits << s.text[s.pos]`
			if s.text[s.pos] == `.` {
			`is_fl = true`
			`dot_index = digits.len - 1`
			`}`
			`s.move_pos_with_newlines()`
			`}`
			`if dot_index + 1 < s.text.len && digits[dot_index + 1..].len == 0 {`
			`return s.error('invalid float')`
			`}`
			if s.pos < s.text.len && (s.text[s.pos] == `e` \|\| s.text[s.pos] == `E`) {
			`digits << s.text[s.pos]`
			`s.move_pos_with_newlines()`
			`if s.pos < s.text.len && s.text[s.pos] in json2.exp_signs {`
			`digits << s.text[s.pos]`
			`s.move_pos_with_newlines()`
			`}`
			`mut exp_digits_count := 0`
			`for s.pos < s.text.len && s.text[s.pos].is_digit() {`
			`digits << s.text[s.pos]`
			`exp_digits_count++`
			`s.move_pos_with_newlines()`
			`}`
			`if exp_digits_count == 0 {`
			`return s.error('invalid exponent')`
			`}`
			`}`
			`kind := if is_fl { TokenKind.float } else { TokenKind.int_ }`
			`return s.tokenize(digits, kind)`
			`}`

			`// invalid_token returns an error token with the invalid token message.`
			`fn (s Scanner) invalid_token() Token {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {`
tests: add vlib/x/json2 to vtest-cleancode.v 2021-04-11 11:34:08 +03:00			`x := s.text[s.pos].ascii_str()`
vfmt: change all '$expr' to '${expr}' (#16428) 2022-11-15 16:53:13 +03:00			return s.error('invalid token `${x}`')
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`} else {`
tests: add vlib/x/json2 to vtest-cleancode.v 2021-04-11 11:34:08 +03:00			`x := s.text[s.pos].str_escaped()`
vfmt: change all '$expr' to '${expr}' (#16428) 2022-11-15 16:53:13 +03:00			return s.error('invalid token `${x}`')
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`}`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`

			`// scan returns a token based on the scanner's current position.`
json2: small refactor (#16913) 2023-06-27 20:07:44 +03:00			`// used to set the next token`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`[manualfree]`
			`fn (mut s Scanner) scan() Token {`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			if s.pos < s.text.len && (s.text[s.pos] == ` ` \|\| s.text[s.pos] in json2.newlines) {
			`s.move()`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`}`
			`if s.pos >= s.text.len {`
all: replace []byte with []u8 2022-04-15 15:35:35 +03:00			`return s.tokenize([]u8{}, .eof)`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			} else if s.pos + 3 < s.text.len && (s.text[s.pos] == `t` \|\| s.text[s.pos] == `n`) {
			`ident := s.text[s.pos..s.pos + 4].bytestr()`
			`if ident == 'true' \|\| ident == 'null' {`
			`mut kind := TokenKind.null`
			`if ident == 'true' {`
			`kind = .bool_`
			`}`
			`unsafe { ident.free() }`
			`val := s.text[s.pos..s.pos + 4]`
			`tok := s.tokenize(val, kind)`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move() // n / t`
			`s.move() // u / r`
			`s.move() // l / u`
			`s.move() // l / e`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return tok`
			`}`
			`unsafe { ident.free() }`
			`return s.invalid_token()`
			} else if s.pos + 4 < s.text.len && s.text[s.pos] == `f` {
			`ident := s.text[s.pos..s.pos + 5].bytestr()`
			`if ident == 'false' {`
			`unsafe { ident.free() }`
			`val := s.text[s.pos..s.pos + 5]`
			`tok := s.tokenize(val, .bool_)`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move() // f`
			`s.move() // a`
			`s.move() // l`
			`s.move() // s`
			`s.move() // e`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return tok`
			`}`
			`unsafe { ident.free() }`
			`return s.invalid_token()`
			`} else if s.text[s.pos] in json2.char_list {`
			`chr := s.text[s.pos]`
checker: allow `EnumName(number)` casts only inside `unsafe{}` blocks (#15932) 2022-10-02 22:39:11 +03:00			`tok := s.tokenize([]u8{}, unsafe { TokenKind(int(chr)) })`
x.json2: skip whitespace before scanning (#9508) 2021-03-30 10:40:20 +03:00			`s.move()`
x.json2: create custom scanner for scanning JSON (#8716) 2021-02-26 09:36:02 +03:00			`return tok`
			} else if s.text[s.pos] == `"` {
			`return s.text_scan()`
			} else if s.text[s.pos].is_digit() \|\| s.text[s.pos] == `-` {
			`return s.num_scan()`
			`} else {`
			`return s.invalid_token()`
			`}`
			`}`