mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
builtin,strconv: speedup str.int()
conversions (without -prod)
This commit is contained in:
parent
a462610376
commit
fc8e3d0971
vlib
builtin
strconv
strings
v/tests
@ -1818,6 +1818,7 @@ pub fn (s []string) join_lines() string {
|
|||||||
|
|
||||||
// reverse returns a reversed string.
|
// reverse returns a reversed string.
|
||||||
// Example: assert 'Hello V'.reverse() == 'V olleH'
|
// Example: assert 'Hello V'.reverse() == 'V olleH'
|
||||||
|
[direct_array_access]
|
||||||
pub fn (s string) reverse() string {
|
pub fn (s string) reverse() string {
|
||||||
if s.len == 0 || s.len == 1 {
|
if s.len == 0 || s.len == 1 {
|
||||||
return s.clone()
|
return s.clone()
|
||||||
@ -1870,6 +1871,7 @@ pub fn (s string) bytes() []u8 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// repeat returns a new string with `count` number of copies of the string it was called on.
|
// repeat returns a new string with `count` number of copies of the string it was called on.
|
||||||
|
[direct_array_access]
|
||||||
pub fn (s string) repeat(count int) string {
|
pub fn (s string) repeat(count int) string {
|
||||||
if count < 0 {
|
if count < 0 {
|
||||||
panic('string.repeat: count is negative: $count')
|
panic('string.repeat: count is negative: $count')
|
||||||
|
@ -101,7 +101,7 @@ fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
|
|||||||
|
|
||||||
// Utility functions
|
// Utility functions
|
||||||
fn is_digit(x u8) bool {
|
fn is_digit(x u8) bool {
|
||||||
return (x >= strconv.c_zero && x <= strconv.c_nine) == true
|
return x >= strconv.c_zero && x <= strconv.c_nine
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_space(x u8) bool {
|
fn is_space(x u8) bool {
|
||||||
@ -109,7 +109,7 @@ fn is_space(x u8) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn is_exp(x u8) bool {
|
fn is_exp(x u8) bool {
|
||||||
return (x == `E` || x == `e`) == true
|
return x == `E` || x == `e`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Possible parser return values.
|
// Possible parser return values.
|
||||||
@ -124,6 +124,7 @@ enum ParserState {
|
|||||||
|
|
||||||
// parser tries to parse the given string into a number
|
// parser tries to parse the given string into a number
|
||||||
// NOTE: #TOFIX need one char after the last char of the number
|
// NOTE: #TOFIX need one char after the last char of the number
|
||||||
|
[direct_array_access]
|
||||||
fn parser(s string) (ParserState, PrepNumber) {
|
fn parser(s string) (ParserState, PrepNumber) {
|
||||||
mut digx := 0
|
mut digx := 0
|
||||||
mut result := ParserState.ok
|
mut result := ParserState.ok
|
||||||
|
@ -16,6 +16,7 @@ Know limitation:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// atof_quick return a f64 number from a string in a quick way
|
// atof_quick return a f64 number from a string in a quick way
|
||||||
|
[direct_array_access]
|
||||||
pub fn atof_quick(s string) f64 {
|
pub fn atof_quick(s string) f64 {
|
||||||
mut f := Float64u{} // result
|
mut f := Float64u{} // result
|
||||||
mut sign := f64(1.0) // result sign
|
mut sign := f64(1.0) // result sign
|
||||||
|
@ -12,8 +12,9 @@ const (
|
|||||||
max_u64 = u64(18446744073709551615) // as u64 // use this until we add support
|
max_u64 = u64(18446744073709551615) // as u64 // use this until we add support
|
||||||
)
|
)
|
||||||
|
|
||||||
|
[inline]
|
||||||
pub fn byte_to_lower(c u8) u8 {
|
pub fn byte_to_lower(c u8) u8 {
|
||||||
return c | (`x` - `X`)
|
return c | 32
|
||||||
}
|
}
|
||||||
|
|
||||||
// common_parse_uint is called by parse_uint and allows the parsing
|
// common_parse_uint is called by parse_uint and allows the parsing
|
||||||
@ -34,14 +35,14 @@ pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit
|
|||||||
|
|
||||||
// the first returned value contains the parsed value,
|
// the first returned value contains the parsed value,
|
||||||
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
|
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
|
||||||
|
[direct_array_access]
|
||||||
pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
|
pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
|
||||||
mut bit_size := _bit_size
|
|
||||||
mut base := _base
|
|
||||||
if s.len < 1 || !underscore_ok(s) {
|
if s.len < 1 || !underscore_ok(s) {
|
||||||
// return error('parse_uint: syntax error $s')
|
// return error('parse_uint: syntax error $s')
|
||||||
return u64(0), 1
|
return u64(0), 1
|
||||||
}
|
}
|
||||||
base0 := base == 0
|
mut bit_size := _bit_size
|
||||||
|
mut base := _base
|
||||||
mut start_index := 0
|
mut start_index := 0
|
||||||
if 2 <= base && base <= 36 {
|
if 2 <= base && base <= 36 {
|
||||||
// valid base; nothing to do
|
// valid base; nothing to do
|
||||||
@ -49,13 +50,13 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
|
|||||||
// Look for octal, hex prefix.
|
// Look for octal, hex prefix.
|
||||||
base = 10
|
base = 10
|
||||||
if s[0] == `0` {
|
if s[0] == `0` {
|
||||||
if s.len >= 3 && byte_to_lower(s[1]) == `b` {
|
if s.len >= 3 && s[1] | 32 == `b` {
|
||||||
base = 2
|
base = 2
|
||||||
start_index += 2
|
start_index += 2
|
||||||
} else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
|
} else if s.len >= 3 && s[1] | 32 == `o` {
|
||||||
base = 8
|
base = 8
|
||||||
start_index += 2
|
start_index += 2
|
||||||
} else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
|
} else if s.len >= 3 && s[1] | 32 == `x` {
|
||||||
base = 16
|
base = 16
|
||||||
start_index += 2
|
start_index += 2
|
||||||
}
|
}
|
||||||
@ -85,10 +86,10 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
|
|||||||
mut n := u64(0)
|
mut n := u64(0)
|
||||||
for i in start_index .. s.len {
|
for i in start_index .. s.len {
|
||||||
c := s[i]
|
c := s[i]
|
||||||
cl := byte_to_lower(c)
|
cl := c | 32
|
||||||
|
|
||||||
mut d := u8(0)
|
mut d := u8(0)
|
||||||
if c == `_` && base0 {
|
if c == `_` && _base == 0 {
|
||||||
// underscore_ok already called
|
// underscore_ok already called
|
||||||
continue
|
continue
|
||||||
} else if `0` <= c && c <= `9` {
|
} else if `0` <= c && c <= `9` {
|
||||||
@ -125,13 +126,17 @@ pub fn parse_uint(s string, _base int, _bit_size int) ?u64 {
|
|||||||
|
|
||||||
// common_parse_int is called by parse int and allows the parsing
|
// common_parse_int is called by parse int and allows the parsing
|
||||||
// to stop on non or invalid digit characters and return with an error
|
// to stop on non or invalid digit characters and return with an error
|
||||||
|
[direct_array_access]
|
||||||
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?i64 {
|
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?i64 {
|
||||||
mut s := _s
|
if _s.len < 1 {
|
||||||
mut bit_size := _bit_size
|
|
||||||
if s.len < 1 {
|
|
||||||
// return error('parse_int: syntax error $s')
|
// return error('parse_int: syntax error $s')
|
||||||
return i64(0)
|
return i64(0)
|
||||||
}
|
}
|
||||||
|
mut bit_size := _bit_size
|
||||||
|
if bit_size == 0 {
|
||||||
|
bit_size = strconv.int_size
|
||||||
|
}
|
||||||
|
mut s := _s
|
||||||
// Pick off leading sign.
|
// Pick off leading sign.
|
||||||
mut neg := false
|
mut neg := false
|
||||||
if s[0] == `+` {
|
if s[0] == `+` {
|
||||||
@ -148,9 +153,6 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b
|
|||||||
if un == 0 {
|
if un == 0 {
|
||||||
return i64(0)
|
return i64(0)
|
||||||
}
|
}
|
||||||
if bit_size == 0 {
|
|
||||||
bit_size = strconv.int_size
|
|
||||||
}
|
|
||||||
// TODO: check should u64(bit_size-1) be size of int (32)?
|
// TODO: check should u64(bit_size-1) be size of int (32)?
|
||||||
cutoff := u64(1) << u64(bit_size - 1)
|
cutoff := u64(1) << u64(bit_size - 1)
|
||||||
if !neg && un >= cutoff {
|
if !neg && un >= cutoff {
|
||||||
@ -181,9 +183,10 @@ pub fn parse_int(_s string, base int, _bit_size int) ?i64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
|
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
|
||||||
|
[direct_array_access]
|
||||||
pub fn atoi(s string) ?int {
|
pub fn atoi(s string) ?int {
|
||||||
if s == '' {
|
if s == '' {
|
||||||
return error('strconv.atoi: parsing "$s": invalid syntax ')
|
return error('strconv.atoi: parsing "": invalid syntax')
|
||||||
}
|
}
|
||||||
if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
|
if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
|
||||||
|| (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
|
|| (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
|
||||||
@ -193,7 +196,7 @@ pub fn atoi(s string) ?int {
|
|||||||
start_idx++
|
start_idx++
|
||||||
if s.len - start_idx < 1 {
|
if s.len - start_idx < 1 {
|
||||||
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
||||||
return error('strconv.atoi: parsing "$s": invalid syntax ')
|
return error('strconv.atoi: parsing "$s": invalid syntax')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mut n := 0
|
mut n := 0
|
||||||
@ -201,7 +204,7 @@ pub fn atoi(s string) ?int {
|
|||||||
ch := s[i] - `0`
|
ch := s[i] - `0`
|
||||||
if ch > 9 {
|
if ch > 9 {
|
||||||
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
||||||
return error('strconv.atoi: parsing "$s": invalid syntax ')
|
return error('strconv.atoi: parsing "$s": invalid syntax')
|
||||||
}
|
}
|
||||||
n = n * 10 + int(ch)
|
n = n * 10 + int(ch)
|
||||||
}
|
}
|
||||||
@ -215,6 +218,7 @@ pub fn atoi(s string) ?int {
|
|||||||
// underscore_ok reports whether the underscores in s are allowed.
|
// underscore_ok reports whether the underscores in s are allowed.
|
||||||
// Checking them in this one function lets all the parsers skip over them simply.
|
// Checking them in this one function lets all the parsers skip over them simply.
|
||||||
// Underscore must appear only between digits or between a base prefix and a digit.
|
// Underscore must appear only between digits or between a base prefix and a digit.
|
||||||
|
[direct_array_access]
|
||||||
fn underscore_ok(s string) bool {
|
fn underscore_ok(s string) bool {
|
||||||
// saw tracks the last character (class) we saw:
|
// saw tracks the last character (class) we saw:
|
||||||
// ^ for beginning of number,
|
// ^ for beginning of number,
|
||||||
@ -229,17 +233,16 @@ fn underscore_ok(s string) bool {
|
|||||||
}
|
}
|
||||||
// Optional base prefix.
|
// Optional base prefix.
|
||||||
mut hex := false
|
mut hex := false
|
||||||
if s.len - i >= 2 && s[i] == `0` && (byte_to_lower(s[i + 1]) == `b`
|
if (s.len - i >= 2) && (s[i] == `0`) && (((s[i + 1] | 32) == `b`)
|
||||||
|| byte_to_lower(s[i + 1]) == `o` || byte_to_lower(s[i + 1]) == `x`) {
|
|| ((s[i + 1] | 32) == `o`) || ((s[i + 1] | 32) == `x`)) {
|
||||||
saw = `0` // base prefix counts as a digit for "underscore as digit separator"
|
saw = `0` // base prefix counts as a digit for "underscore as digit separator"
|
||||||
hex = byte_to_lower(s[i + 1]) == `x`
|
hex = (s[i + 1] | 32) == `x`
|
||||||
i += 2
|
i += 2
|
||||||
}
|
}
|
||||||
// Number proper.
|
// Number proper.
|
||||||
for ; i < s.len; i++ {
|
for ; i < s.len; i++ {
|
||||||
// Digits are always okay.
|
// Digits are always okay.
|
||||||
if (`0` <= s[i] && s[i] <= `9`) || (hex && `a` <= byte_to_lower(s[i])
|
if (`0` <= s[i] && s[i] <= `9`) || ((hex && `a` <= (s[i] | 32)) && ((s[i] | 32) <= `f`)) {
|
||||||
&& byte_to_lower(s[i]) <= `f`) {
|
|
||||||
saw = `0`
|
saw = `0`
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ pub fn format_int(n i64, radix int) string {
|
|||||||
|
|
||||||
// format_uint returns the string representation of the number n in base `radix`
|
// format_uint returns the string representation of the number n in base `radix`
|
||||||
// for digit values > 10, this function uses the small latin leters a-z.
|
// for digit values > 10, this function uses the small latin leters a-z.
|
||||||
[manualfree]
|
[direct_array_access; manualfree]
|
||||||
pub fn format_uint(n u64, radix int) string {
|
pub fn format_uint(n u64, radix int) string {
|
||||||
unsafe {
|
unsafe {
|
||||||
if radix < 2 || radix > 36 {
|
if radix < 2 || radix > 36 {
|
||||||
|
@ -75,7 +75,7 @@ pub fn f64_to_str_l_no_dot(f f64) string {
|
|||||||
// floating-point `string` in scientific notation.
|
// floating-point `string` in scientific notation.
|
||||||
//
|
//
|
||||||
// Example: assert strconv.fxx_to_str_l_parse('34.22e+00') == '34.22'
|
// Example: assert strconv.fxx_to_str_l_parse('34.22e+00') == '34.22'
|
||||||
[manualfree]
|
[direct_array_access; manualfree]
|
||||||
pub fn fxx_to_str_l_parse(s string) string {
|
pub fn fxx_to_str_l_parse(s string) string {
|
||||||
// check for +inf -inf Nan
|
// check for +inf -inf Nan
|
||||||
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
|
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
|
||||||
@ -202,7 +202,7 @@ pub fn fxx_to_str_l_parse(s string) string {
|
|||||||
// The decimal digits after the dot can be omitted.
|
// The decimal digits after the dot can be omitted.
|
||||||
//
|
//
|
||||||
// Example: assert strconv.fxx_to_str_l_parse_no_dot ('34.e+01') == '340'
|
// Example: assert strconv.fxx_to_str_l_parse_no_dot ('34.e+01') == '340'
|
||||||
[manualfree]
|
[direct_array_access; manualfree]
|
||||||
pub fn fxx_to_str_l_parse_no_dot(s string) string {
|
pub fn fxx_to_str_l_parse_no_dot(s string) string {
|
||||||
// check for +inf -inf Nan
|
// check for +inf -inf Nan
|
||||||
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
|
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
|
||||||
|
@ -36,7 +36,7 @@ pub fn v_printf(str string, pt ...voidptr) {
|
|||||||
// assert strconv.v_sprintf('aaa %G', x) == 'aaa 3.141516'
|
// assert strconv.v_sprintf('aaa %G', x) == 'aaa 3.141516'
|
||||||
// ```
|
// ```
|
||||||
[deprecated: 'use string interpolation instead']
|
[deprecated: 'use string interpolation instead']
|
||||||
[manualfree]
|
[direct_array_access; manualfree]
|
||||||
pub fn v_sprintf(str string, pt ...voidptr) string {
|
pub fn v_sprintf(str string, pt ...voidptr) string {
|
||||||
mut res := strings.new_builder(pt.len * 16)
|
mut res := strings.new_builder(pt.len * 16)
|
||||||
defer {
|
defer {
|
||||||
@ -560,7 +560,7 @@ fn fabs(x f64) f64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// strings.Builder version of format_fl
|
// strings.Builder version of format_fl
|
||||||
[manualfree]
|
[direct_array_access; manualfree]
|
||||||
pub fn format_fl_old(f f64, p BF_param) string {
|
pub fn format_fl_old(f f64, p BF_param) string {
|
||||||
unsafe {
|
unsafe {
|
||||||
mut s := ''
|
mut s := ''
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
module strings
|
module strings
|
||||||
|
|
||||||
// strings.repeat - fill a string with `n` repetitions of the character `c`
|
// strings.repeat - fill a string with `n` repetitions of the character `c`
|
||||||
|
[direct_array_access]
|
||||||
pub fn repeat(c u8, n int) string {
|
pub fn repeat(c u8, n int) string {
|
||||||
if n <= 0 {
|
if n <= 0 {
|
||||||
return ''
|
return ''
|
||||||
@ -16,6 +17,7 @@ pub fn repeat(c u8, n int) string {
|
|||||||
// strings.repeat_string - gives you `n` repetitions of the substring `s`
|
// strings.repeat_string - gives you `n` repetitions of the substring `s`
|
||||||
// Note: strings.repeat, that repeats a single byte, is between 2x
|
// Note: strings.repeat, that repeats a single byte, is between 2x
|
||||||
// and 24x faster than strings.repeat_string called for a 1 char string.
|
// and 24x faster than strings.repeat_string called for a 1 char string.
|
||||||
|
[direct_array_access]
|
||||||
pub fn repeat_string(s string, n int) string {
|
pub fn repeat_string(s string, n int) string {
|
||||||
if n <= 0 || s.len == 0 {
|
if n <= 0 || s.len == 0 {
|
||||||
return ''
|
return ''
|
||||||
|
17
vlib/v/tests/bench/bench_string_int.v
Normal file
17
vlib/v/tests/bench/bench_string_int.v
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import benchmark
|
||||||
|
|
||||||
|
const maxn = 999_999
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
mut snumbers := []string{cap: maxn}
|
||||||
|
for i in 0 .. maxn {
|
||||||
|
snumbers << i.str()
|
||||||
|
}
|
||||||
|
mut sum := i64(0)
|
||||||
|
mut bmark := benchmark.start()
|
||||||
|
for s in snumbers {
|
||||||
|
sum += s.int()
|
||||||
|
}
|
||||||
|
bmark.measure('s.int()')
|
||||||
|
dump(sum)
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
fn test_big_int_array() {
|
fn test_big_int_array() {
|
||||||
dump(sizeof(isize))
|
dump(sizeof(isize))
|
||||||
@ -5,6 +7,11 @@ fn test_big_int_array() {
|
|||||||
if sizeof(isize) > 4 {
|
if sizeof(isize) > 4 {
|
||||||
maxn = 1_000_000_000 // 1 billion integers, when each is 4 bytes => require ~4GB
|
maxn = 1_000_000_000 // 1 billion integers, when each is 4 bytes => require ~4GB
|
||||||
}
|
}
|
||||||
|
// NB: this test requires RAM that many people do not have, so only run it in full, when VTEST_BIGMEM is 1
|
||||||
|
vtest_bigmem := os.getenv('VTEST_BIGMEM').int()
|
||||||
|
if vtest_bigmem == 0 {
|
||||||
|
maxn = 10_000_000
|
||||||
|
}
|
||||||
dump(maxn)
|
dump(maxn)
|
||||||
mut data := []int{len: maxn}
|
mut data := []int{len: maxn}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user