diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index f265ba8bee..2df86b783d 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -1101,6 +1101,7 @@ fn (mut s Scanner) ident_string() string { start++ } s.is_inside_string = false + mut u_to_x_pos := []int{} // pos list to replace \u0020 -> \x20 slash := `\\` for { s.pos++ @@ -1146,13 +1147,17 @@ fn (mut s Scanner) ident_string() string { s.error(r'`\x` used with no following hex digits') } // Escape `\u` - if c == `u` && (s.text[s.pos + 1] == s.quote - || s.text[s.pos + 2] == s.quote || s.text[s.pos + 3] == s.quote - || s.text[s.pos + 4] == s.quote || !s.text[s.pos + 1].is_hex_digit() - || !s.text[s.pos + 2].is_hex_digit() - || !s.text[s.pos + 3].is_hex_digit() - || !s.text[s.pos + 4].is_hex_digit()) { - s.error(r'`\u` incomplete unicode character value') + if c == `u` { + if s.text[s.pos + 1] == s.quote || s.text[s.pos + 2] == s.quote + || s.text[s.pos + 3] == s.quote || s.text[s.pos + 4] == s.quote + || !s.text[s.pos + 1].is_hex_digit() || !s.text[s.pos + 2].is_hex_digit() + || !s.text[s.pos + 3].is_hex_digit() || !s.text[s.pos + 4].is_hex_digit() { + s.error(r'`\u` incomplete unicode character value') + } else if s.text[s.pos + 1] == `0` && s.text[s.pos + 2] == `0` + && (`0` <= s.text[s.pos + 3] && s.text[s.pos + 3] < `8`) { + // ascii + u_to_x_pos << s.pos - 1 + } } } // ${var} (ignore in vfmt mode) (skip \$) @@ -1179,6 +1184,15 @@ fn (mut s Scanner) ident_string() string { } if start <= s.pos { mut string_so_far := s.text[start..end] + if u_to_x_pos.len > 0 { + mut ss := []string{cap: u_to_x_pos.len + 1} + ss << string_so_far[..u_to_x_pos[0] - start] + for i in 0 .. u_to_x_pos.len - 1 { + ss << r'\x' + string_so_far[u_to_x_pos[i] - start + 4..u_to_x_pos[i + 1] - start] + } + ss << r'\x' + string_so_far[u_to_x_pos.last() + 4 - start..] + string_so_far = ss.join('') + } if n_cr_chars > 0 { string_so_far = string_so_far.replace('\r', '') } diff --git a/vlib/v/tests/strings_unicode_test.v b/vlib/v/tests/strings_unicode_test.v new file mode 100644 index 0000000000..28a98e2dfa --- /dev/null +++ b/vlib/v/tests/strings_unicode_test.v @@ -0,0 +1,10 @@ +fn test_raw_string() { + assert r'\n\u00c0' == '\\n\\u00c0' +} + +fn test_escape() { + assert '\x20' == ' ' + assert '\u0020' == ' ' + // assert '\u00c4' == 'Ä' + assert '\r\n'.bytes() == [byte(0x0d), 0x0a] +}