From 4189b7e28075c77fde71a1bf3887999f8d484620 Mon Sep 17 00:00:00 2001 From: jeffmikels Date: Thu, 13 Jan 2022 15:26:17 -0500 Subject: [PATCH] builtin: add a `byterune` method on `[]byte` (#13145) --- vlib/builtin/int.v | 9 ++++ vlib/builtin/utf8.v | 49 +++++++++++++------ vlib/v/checker/tests/fn_type_mismatch.out | 2 +- .../checker/tests/int_modulo_by_zero_err.out | 10 ++-- .../v/checker/tests/modify_const_with_ref.out | 4 +- .../tests/module_multiple_names_err.out | 2 +- 6 files changed, 53 insertions(+), 23 deletions(-) diff --git a/vlib/builtin/int.v b/vlib/builtin/int.v index a04153d5ca..8782b68ec1 100644 --- a/vlib/builtin/int.v +++ b/vlib/builtin/int.v @@ -546,6 +546,15 @@ pub fn (b []byte) bytestr() string { } } +// byterune attempts to decode a sequence of bytes +// from utf8 to utf32 and return the result as a rune +// it will produce an error if there are more than +// four bytes in the array. +pub fn (b []byte) byterune() ?rune { + r := b.utf8_to_utf32() ? + return rune(r) +} + // repeat returns a new string with `count` number of copies of the byte it was called on. pub fn (b byte) repeat(count int) string { if count < 0 { diff --git a/vlib/builtin/utf8.v b/vlib/builtin/utf8.v index 1ca083989a..9d18043a1a 100644 --- a/vlib/builtin/utf8.v +++ b/vlib/builtin/utf8.v @@ -67,27 +67,48 @@ pub fn utf32_decode_to_buffer(code u32, buf &byte) int { } // Convert utf8 to utf32 +// the original implementation did not check for +// valid utf8 in the string, and could result in +// values greater than the utf32 spec +// it has been replaced by `utf8_to_utf32` which +// has an optional return type. +// +// this function is left for backward compatibility +// it is used in vlib/builtin/string.v, +// and also in vlib/v/gen/c/cgen.v pub fn (_rune string) utf32_code() int { - if _rune.len == 0 { + return int(_rune.bytes().utf8_to_utf32() or { + // error('more than one utf-8 rune found in this string') + rune(0) + }) +} + +// convert array of utf8 bytes to single utf32 value +// will error if more than 4 bytes are submitted +pub fn (_bytes []byte) utf8_to_utf32() ?rune { + if _bytes.len == 0 { return 0 } - // save ASC symbol as is - if _rune.len == 1 { - return int(_rune[0]) + // return ASCII unchanged + if _bytes.len == 1 { + return rune(_bytes[0]) } - mut b := byte(int(_rune[0])) - // TODO should be - // res := int( rune[0] << rune.len) - b = b << _rune.len - mut res := u32(b) - mut shift := 6 - _rune.len - for i := 1; i < _rune.len; i++ { - c := u32(_rune[i]) - res = u32(res) << shift + if _bytes.len > 4 { + return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum') + } + + mut b := byte(int(_bytes[0])) + + b = b << _bytes.len + mut res := rune(b) + mut shift := 6 - _bytes.len + for i := 1; i < _bytes.len; i++ { + c := rune(_bytes[i]) + res = rune(res) << shift res |= c & 63 // 0x3f shift = 6 } - return int(res) + return res } // Calculate length to read from the first byte diff --git a/vlib/v/checker/tests/fn_type_mismatch.out b/vlib/v/checker/tests/fn_type_mismatch.out index 0a2932d956..1fe81fe20c 100644 --- a/vlib/v/checker/tests/fn_type_mismatch.out +++ b/vlib/v/checker/tests/fn_type_mismatch.out @@ -1,5 +1,5 @@ vlib/v/checker/tests/fn_type_mismatch.vv:11:15: error: invalid array element: expected `fn (int, int) f32`, not `fn (f32, f32) f32` - 9 | + 9 | 10 | fn main() { 11 | fns := [add, div] | ~~~ diff --git a/vlib/v/checker/tests/int_modulo_by_zero_err.out b/vlib/v/checker/tests/int_modulo_by_zero_err.out index 897304072c..8c10763870 100644 --- a/vlib/v/checker/tests/int_modulo_by_zero_err.out +++ b/vlib/v/checker/tests/int_modulo_by_zero_err.out @@ -1,5 +1,5 @@ -vlib/v/checker/tests/int_modulo_by_zero_err.vv:2:17: error: modulo by zero - 1 | fn main() { - 2 | println(3 % 0) - | ^ - 3 | } +vlib/v/checker/tests/int_modulo_by_zero_err.vv:2:17: error: modulo by zero + 1 | fn main() { + 2 | println(3 % 0) + | ^ + 3 | } diff --git a/vlib/v/checker/tests/modify_const_with_ref.out b/vlib/v/checker/tests/modify_const_with_ref.out index c54a42da02..dc2b4335bc 100644 --- a/vlib/v/checker/tests/modify_const_with_ref.out +++ b/vlib/v/checker/tests/modify_const_with_ref.out @@ -6,9 +6,9 @@ vlib/v/checker/tests/modify_const_with_ref.vv:11:11: error: `constant` is immuta 12 | c.value = 200 13 | } vlib/v/checker/tests/modify_const_with_ref.vv:9:6: error: unused variable: `unused_var` - 7 | + 7 | 8 | fn main() { 9 | mut unused_var := Foo{} | ~~~~~~~~~~ 10 | unused_var = Foo{} - 11 | mut c := &constant + 11 | mut c := &constant \ No newline at end of file diff --git a/vlib/v/parser/tests/module_multiple_names_err.out b/vlib/v/parser/tests/module_multiple_names_err.out index 6ae53a8732..2f2124972d 100644 --- a/vlib/v/parser/tests/module_multiple_names_err.out +++ b/vlib/v/parser/tests/module_multiple_names_err.out @@ -2,4 +2,4 @@ vlib/v/parser/tests/module_multiple_names_err.vv:1:13: error: `module main`, you 1 | module main os | ~~ 2 | fn main() { - 3 | println('hello, world') + 3 | println('hello, world') \ No newline at end of file