From 14424100e809853b7632c3c8fa59312d24e863a6 Mon Sep 17 00:00:00 2001 From: Larpon Date: Mon, 29 Nov 2021 21:15:22 +0100 Subject: [PATCH] toml: comply with BurntSushi @eb989e5 (#12616) --- .github/workflows/toml_ci.yml | 2 +- vlib/toml/checker/checker.v | 42 ++++++++++++++++++++++++++++++----- vlib/toml/decoder/decoder.v | 2 +- vlib/toml/scanner/scanner.v | 8 ++++++- vlib/toml/tests/crlf_test.v | 4 ++-- 5 files changed, 47 insertions(+), 11 deletions(-) diff --git a/.github/workflows/toml_ci.yml b/.github/workflows/toml_ci.yml index 2702644252..f9d0adf39c 100644 --- a/.github/workflows/toml_ci.yml +++ b/.github/workflows/toml_ci.yml @@ -14,7 +14,7 @@ jobs: timeout-minutes: 10 env: TOML_BS_TESTS_PATH: vlib/toml/tests/testdata/burntsushi/toml-test - TOML_BS_TESTS_PINNED_COMMIT: 8baf830 + TOML_BS_TESTS_PINNED_COMMIT: eb989e5 TOML_IARNA_TESTS_PATH: vlib/toml/tests/testdata/iarna/toml-test TOML_IARNA_TESTS_PINNED_COMMIT: 1880b1a steps: diff --git a/vlib/toml/checker/checker.v b/vlib/toml/checker/checker.v index 6c96688aeb..ef467e23b2 100644 --- a/vlib/toml/checker/checker.v +++ b/vlib/toml/checker/checker.v @@ -356,7 +356,23 @@ fn (c Checker) check_date(date ast.Date) ? { fn (c Checker) check_time(t ast.Time) ? { lit := t.text // Split any offsets from the time - parts := lit.split('-') + mut offset_splitter := if lit.contains('+') { '+' } else { '-' } + parts := lit.split(offset_splitter) + mut hhmmss := parts[0].all_before('.') + // Check for 2 digits in all fields + mut check_length := 8 + if hhmmss.to_upper().ends_with('Z') { + check_length++ + } + if hhmmss.len != check_length { + starts_with_zero := hhmmss.starts_with('0') + if !starts_with_zero { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" must be zero prefixed in ...${c.excerpt(t.pos)}...') + } + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Time format string in ...${c.excerpt(t.pos)}...') + } // Use V's builtin functionality to validate the time string time.parse_rfc3339(parts[0]) or { return error(@MOD + '.' + @STRUCT + '.' + @FN + @@ -397,6 +413,7 @@ fn (c Checker) check_quoted_escapes(q ast.Quoted) ? { // See https://toml.io/en/v1.0.0#string for more info on string types. is_basic := q.quote == `\"` + contains_newlines := q.text.contains('\n') for { ch := s.next() if ch == scanner.end_of_text { @@ -414,10 +431,17 @@ fn (c Checker) check_quoted_escapes(q ast.Quoted) ? { escape := ch_byte.ascii_str() + next_ch.ascii_str() if is_basic { if q.is_multiline { - if next_ch == byte(32) && s.peek(1) == byte(92) { - st := s.state() - return error(@MOD + '.' + @STRUCT + '.' + @FN + - ' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...') + if next_ch == byte(32) { + if s.peek(1) == byte(92) { + st := s.state() + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...') + } + if !contains_newlines { + st := s.state() + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' can not escape whitespaces in multi-line strings (`\\ `) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...') + } } if next_ch in [`\t`, `\n`, ` `] { s.next() @@ -518,10 +542,16 @@ pub fn (c Checker) check_comment(comment ast.Comment) ? { mut s := scanner.new_simple(lit) ? for { ch := s.next() - if ch == -1 { + if ch == scanner.end_of_text { break } ch_byte := byte(ch) + // Check for carrige return + if ch_byte == 0x0D { + st := s.state() + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' carrige return character `$ch_byte.hex()` is not allowed ($st.line_nr,$st.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(st.pos, 10)}...') + } // Check for control characters (allow TAB) if util.is_illegal_ascii_control_character(ch_byte) { st := s.state() diff --git a/vlib/toml/decoder/decoder.v b/vlib/toml/decoder/decoder.v index 042163c612..cd5fe942da 100644 --- a/vlib/toml/decoder/decoder.v +++ b/vlib/toml/decoder/decoder.v @@ -252,7 +252,7 @@ fn (d Decoder) decode_date_time(mut dt ast.DateTime) ? { if ms.len > 1 { return } - ms = ms + '0'.repeat(6 - ms.len) + z + ms = ms + '0'.repeat(4 - ms.len) + z dt.text = yymmddhhmmss + '.' + ms + offset } } diff --git a/vlib/toml/scanner/scanner.v b/vlib/toml/scanner/scanner.v index 09d78a0961..924c46445e 100644 --- a/vlib/toml/scanner/scanner.v +++ b/vlib/toml/scanner/scanner.v @@ -93,6 +93,12 @@ pub fn (mut s Scanner) scan() ?token.Token { ascii := byte_c.ascii_str() util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'current char "$ascii"') + if byte_c == byte(0x0) { + s.reset() + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' NULL control character `$c.hex()` is not allowed at ($s.line_nr,$s.col) "$ascii" near ...${s.excerpt(s.pos, 5)}...') + } + is_sign := c == `+` || c == `-` // (+/-)nan & (+/-)inf @@ -340,7 +346,7 @@ fn (mut s Scanner) ignore_line() ?string { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()} / $c"') if s.at_crlf() { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'letting `\\r\\n` slip through') - return s.text[start..s.pos] + return s.text[start..s.pos + 1] } } return s.text[start..s.pos] diff --git a/vlib/toml/tests/crlf_test.v b/vlib/toml/tests/crlf_test.v index f8203119b7..cb026d7cbe 100644 --- a/vlib/toml/tests/crlf_test.v +++ b/vlib/toml/tests/crlf_test.v @@ -2,8 +2,8 @@ import toml fn test_crlf() { str_value := 'test string' - mut toml_txt := 'crlf_string = "test string" -# Comment with CRLF\r\n' + mut toml_txt := 'crlf_string = "test string"\r\n +# Comment with CRLF is not allowed' toml_doc := toml.parse(toml_txt) or { panic(err) } value := toml_doc.value('crlf_string')