1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/strconv/atoi.v

254 lines
7.1 KiB
V
Raw Normal View History

module strconv
2020-12-21 10:35:24 +03:00
2023-03-28 23:55:57 +03:00
// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
// TODO: use options, or some way to return default with error.
2019-12-20 00:29:37 +03:00
const (
2020-12-21 10:35:24 +03:00
// int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
int_size = 32
2020-12-21 10:35:24 +03:00
max_u64 = u64(18446744073709551615) // as u64 // use this until we add support
)
[inline]
2022-04-15 18:25:45 +03:00
pub fn byte_to_lower(c u8) u8 {
return c | 32
}
// common_parse_uint is called by parse_uint and allows the parsing
// to stop on non or invalid digit characters and return with an error
pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 {
result, err := common_parse_uint2(s, _base, _bit_size)
// TODO: error_on_non_digit and error_on_high_digit have no difference
if err != 0 && (error_on_non_digit || error_on_high_digit) {
match err {
-1 { return error('common_parse_uint: wrong base ${_base} for ${s}') }
-2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') }
-3 { return error('common_parse_uint: integer overflow ${s}') }
else { return error('common_parse_uint: syntax error ${s}') }
}
}
return result
}
// the first returned value contains the parsed value,
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
[direct_array_access]
pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
2023-04-09 06:54:38 +03:00
if s.len < 1 {
return u64(0), 1
}
2023-04-09 06:54:38 +03:00
mut bit_size := _bit_size
mut base := _base
mut start_index := 0
2023-04-09 06:54:38 +03:00
if base == 0 {
// Look for octal, binary and hex prefix.
base = 10
if s[0] == `0` {
2023-04-09 06:54:38 +03:00
ch := s[1] | 32
if s.len >= 3 {
if ch == `b` {
base = 2
start_index += 2
} else if ch == `o` {
base = 8
start_index += 2
} else if ch == `x` {
base = 16
start_index += 2
}
// check for underscore after the base prefix
if s[start_index] == `_` {
start_index++
}
}
// manage leading zeros in decimal base's numbers
2023-04-09 06:54:38 +03:00
// otherwise it is an octal for C compatibility
// TODO: Check if this behaviour is logically right
2019-12-20 00:29:37 +03:00
else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) {
base = 10
start_index++
2020-12-21 10:35:24 +03:00
} else {
base = 8
start_index++
}
}
}
2023-04-09 06:54:38 +03:00
if bit_size == 0 {
2021-06-18 17:59:56 +03:00
bit_size = strconv.int_size
2020-12-21 10:35:24 +03:00
} else if bit_size < 0 || bit_size > 64 {
return u64(0), -2
}
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
2021-06-18 17:59:56 +03:00
cutoff := strconv.max_u64 / u64(base) + u64(1)
max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
basem1 := base - 1
2023-04-09 06:54:38 +03:00
mut n := u64(0)
2019-12-20 00:29:37 +03:00
for i in start_index .. s.len {
2023-04-09 06:54:38 +03:00
mut c := s[i]
// manage underscore inside the number
if c == `_` {
if i == start_index || i >= (s.len - 1) {
// println("_ limit")
return u64(0), 1
}
if s[i - 1] == `_` || s[i + 1] == `_` {
// println("_ *2")
return u64(0), 1
}
continue
}
2023-04-09 06:54:38 +03:00
mut sub_count := 0
2023-04-09 06:54:38 +03:00
// get the 0-9 digit
c -= 48 // subtract the rune `0`
// check if we are in the superior base rune interval [A..Z]
if c >= 17 { // (65 - 48)
sub_count++
c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit
// check if we are in the superior base rune interval [a..z]
if c >= 42 { // (97 - 7 - 48)
sub_count++
c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
}
2023-04-09 06:54:38 +03:00
}
// check for digit over base
if c > basem1 || (sub_count == 0 && c > 9) {
return n, i + 1
}
2023-04-09 06:54:38 +03:00
// check if we are in the cutoff zone
if n >= cutoff {
// n*base overflows
// return error('parse_uint: range error $s')
return max_val, -3
}
n *= u64(base)
2023-04-09 06:54:38 +03:00
n1 := n + u64(c)
if n1 < n || n1 > max_val {
// n+v overflows
// return error('parse_uint: range error $s')
return max_val, -3
}
n = n1
}
return n, 0
}
// parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint(s string, _base int, _bit_size int) !u64 {
return common_parse_uint(s, _base, _bit_size, true, true)
}
// common_parse_int is called by parse int and allows the parsing
// to stop on non or invalid digit characters and return with an error
[direct_array_access]
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 {
if _s.len < 1 {
// return error('parse_int: syntax error $s')
return i64(0)
}
mut bit_size := _bit_size
if bit_size == 0 {
bit_size = strconv.int_size
}
mut s := _s
// Pick off leading sign.
mut neg := false
if s[0] == `+` {
2023-04-09 06:54:38 +03:00
// s = s[1..]
unsafe {
s = tos(s.str + 1, s.len - 1)
}
2020-12-21 10:35:24 +03:00
} else if s[0] == `-` {
neg = true
2023-04-09 06:54:38 +03:00
// s = s[1..]
unsafe {
s = tos(s.str + 1, s.len - 1)
}
}
2023-04-09 06:54:38 +03:00
// Convert unsigned and check range.
// un := parse_uint(s, base, bit_size) or {
2019-12-20 00:29:37 +03:00
// return i64(0)
// }
un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)!
if un == 0 {
return i64(0)
}
// TODO: check should u64(bit_size-1) be size of int (32)?
2020-12-21 10:35:24 +03:00
cutoff := u64(1) << u64(bit_size - 1)
if !neg && un >= cutoff {
// return error('parse_int: range error $s0')
2019-12-20 00:29:37 +03:00
return i64(cutoff - u64(1))
}
if neg && un > cutoff {
// return error('parse_int: range error $s0')
return -i64(cutoff)
}
2021-06-18 17:59:56 +03:00
return if neg { -i64(un) } else { i64(un) }
}
2019-12-20 00:29:37 +03:00
// parse_int interprets a string s in the given base (0, 2 to 36) and
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int(_s string, base int, _bit_size int) !i64 {
return common_parse_int(_s, base, _bit_size, true, true)
}
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
[direct_array_access]
pub fn atoi(s string) !int {
2020-12-21 10:35:24 +03:00
if s == '' {
return error('strconv.atoi: parsing "": invalid syntax')
2020-12-21 10:35:24 +03:00
}
2021-06-18 17:59:56 +03:00
if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
|| (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
// Fast path for small integers that fit int type.
mut start_idx := 0
if s[0] == `-` || s[0] == `+` {
start_idx++
2019-12-20 00:29:37 +03:00
if s.len - start_idx < 1 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
return error('strconv.atoi: parsing "${s}": invalid syntax')
}
}
mut n := 0
2019-12-20 00:29:37 +03:00
for i in start_idx .. s.len {
ch := s[i] - `0`
if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
return error('strconv.atoi: parsing "${s}": invalid syntax')
}
2019-12-20 00:29:37 +03:00
n = n * 10 + int(ch)
}
2021-06-18 17:59:56 +03:00
return if s[0] == `-` { -n } else { n }
}
// Slow path for invalid, big, or underscored integers.
int64 := parse_int(s, 10, 0)!
return int(int64)
}