2020-10-07 11:06:52 +03:00
module strconv
2020-12-21 10:35:24 +03:00
2023-03-28 23:55:57 +03:00
// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
2019-10-17 19:37:55 +03:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2023-01-09 09:36:45 +03:00
// TODO: use options, or some way to return default with error.
2019-12-20 00:29:37 +03:00
const (
2020-12-21 10:35:24 +03:00
// int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
2019-10-17 19:37:55 +03:00
int_size = 32
2020-12-21 10:35:24 +03:00
max_u64 = u64 ( 18446744073709551615 ) // as u64 // use this until we add support
2019-10-17 19:37:55 +03:00
)
2022-09-08 11:09:13 +03:00
[ inline ]
2022-04-15 18:25:45 +03:00
pub fn byte_to_lower ( c u8 ) u8 {
2022-09-08 11:09:13 +03:00
return c | 32
2019-10-17 19:37:55 +03:00
}
2019-11-28 09:46:10 +03:00
// common_parse_uint is called by parse_uint and allows the parsing
2021-07-02 10:39:57 +03:00
// to stop on non or invalid digit characters and return with an error
2022-10-16 09:28:57 +03:00
pub fn common_parse_uint ( s string , _base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) ! u64 {
2021-07-02 10:39:57 +03:00
result , err := common_parse_uint2 ( s , _base , _bit_size )
// TODO: error_on_non_digit and error_on_high_digit have no difference
if err != 0 && ( error_on_non_digit || error_on_high_digit ) {
match err {
2022-11-15 16:53:13 +03:00
- 1 { return error ( ' c o m m o n _ p a r s e _ u i n t : w r o n g b a s e $ { _base } f o r $ { s } ' ) }
- 2 { return error ( ' c o m m o n _ p a r s e _ u i n t : w r o n g b i t s i z e $ { _bit_size } f o r $ { s } ' ) }
- 3 { return error ( ' c o m m o n _ p a r s e _ u i n t : i n t e g e r o v e r f l o w $ { s } ' ) }
else { return error ( ' c o m m o n _ p a r s e _ u i n t : s y n t a x e r r o r $ { s } ' ) }
2020-10-03 20:57:37 +03:00
}
}
return result
}
// the first returned value contains the parsed value,
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2020-10-03 20:57:37 +03:00
pub fn common_parse_uint2 ( s string , _base int , _bit_size int ) ( u64 , int ) {
2023-04-09 06:54:38 +03:00
if s . len < 1 {
2020-10-03 20:57:37 +03:00
return u64 ( 0 ) , 1
2019-10-17 19:37:55 +03:00
}
2023-04-09 06:54:38 +03:00
2022-09-08 11:09:13 +03:00
mut bit_size := _bit_size
mut base := _base
2019-10-18 08:20:03 +03:00
mut start_index := 0
2023-04-09 06:54:38 +03:00
if base == 0 {
// Look for octal, binary and hex prefix.
2019-10-18 08:20:03 +03:00
base = 10
2019-10-17 19:37:55 +03:00
if s [ 0 ] == ` 0 ` {
2023-04-09 06:54:38 +03:00
ch := s [ 1 ] | 32
if s . len >= 3 {
if ch == ` b ` {
base = 2
start_index += 2
} else if ch == ` o ` {
base = 8
start_index += 2
} else if ch == ` x ` {
base = 16
start_index += 2
}
// check for underscore after the base prefix
if s [ start_index ] == ` _ ` {
start_index ++
}
2019-10-17 19:37:55 +03:00
}
2019-12-08 23:22:33 +03:00
// manage leading zeros in decimal base's numbers
2023-04-09 06:54:38 +03:00
// otherwise it is an octal for C compatibility
// TODO: Check if this behaviour is logically right
2019-12-20 00:29:37 +03:00
else if s . len >= 2 && ( s [ 1 ] >= ` 0 ` && s [ 1 ] <= ` 9 ` ) {
base = 10
start_index ++
2020-12-21 10:35:24 +03:00
} else {
2019-10-18 08:20:03 +03:00
base = 8
start_index ++
}
2019-10-17 19:37:55 +03:00
}
}
2023-04-09 06:54:38 +03:00
2019-10-17 19:37:55 +03:00
if bit_size == 0 {
2021-06-18 17:59:56 +03:00
bit_size = strconv . int_size
2020-12-21 10:35:24 +03:00
} else if bit_size < 0 || bit_size > 64 {
2020-10-03 20:57:37 +03:00
return u64 ( 0 ) , - 2
2019-10-17 19:37:55 +03:00
}
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
2021-06-18 17:59:56 +03:00
cutoff := strconv . max_u64 / u64 ( base ) + u64 ( 1 )
max_val := if bit_size == 64 { strconv . max_u64 } else { ( u64 ( 1 ) << u64 ( bit_size ) ) - u64 ( 1 ) }
2023-04-09 06:54:38 +03:00
2019-10-17 19:37:55 +03:00
mut n := u64 ( 0 )
2019-12-20 00:29:37 +03:00
for i in start_index .. s . len {
2023-04-09 06:54:38 +03:00
mut c := s [ i ]
// manage underscore inside the number
if c == ` _ ` {
// println("Here: ${s#[i..]}")
if i == start_index || i >= ( s . len - 1 ) {
// println("_ limit")
return u64 ( 0 ) , 1
}
if s [ i - 1 ] == ` _ ` || s [ i + 1 ] == ` _ ` {
// println("_ *2")
return u64 ( 0 ) , 1
}
2021-09-03 12:16:07 +03:00
2019-10-17 19:37:55 +03:00
continue
}
2023-04-09 06:54:38 +03:00
// get the 0-9 digit
c -= 48 // subtract the rune `0`
// check if we are in the superior base rune interval [A..Z]
if c >= base {
c -= 7
}
// check if we are in the superior base rune interval [a..z]
if c >= base {
c -= 32 // subtract the `A` - `0` rune to obtain the value of the digit
}
// check for digit over base
if c >= base {
2020-10-03 20:57:37 +03:00
return n , i + 1
2019-10-17 19:37:55 +03:00
}
2023-04-09 06:54:38 +03:00
// check if we are in the cutoff zone
2019-10-17 19:37:55 +03:00
if n >= cutoff {
// n*base overflows
2019-10-18 08:20:03 +03:00
// return error('parse_uint: range error $s')
2020-10-03 20:57:37 +03:00
return max_val , - 3
2019-10-17 19:37:55 +03:00
}
n *= u64 ( base )
2023-04-09 06:54:38 +03:00
n1 := n + u64 ( c )
2019-10-18 08:20:03 +03:00
if n1 < n || n1 > max_val {
// n+v overflows
// return error('parse_uint: range error $s')
2020-10-03 20:57:37 +03:00
return max_val , - 3
2019-10-17 19:37:55 +03:00
}
n = n1
}
2020-10-03 20:57:37 +03:00
return n , 0
2019-11-28 09:46:10 +03:00
}
2019-10-17 19:37:55 +03:00
2019-11-28 09:46:10 +03:00
// parse_uint is like parse_int but for unsigned numbers.
2022-10-16 09:28:57 +03:00
pub fn parse_uint ( s string , _base int , _bit_size int ) ! u64 {
2019-11-28 09:46:10 +03:00
return common_parse_uint ( s , _base , _bit_size , true , true )
2019-10-17 19:37:55 +03:00
}
2019-11-28 09:46:10 +03:00
// common_parse_int is called by parse int and allows the parsing
2021-07-02 10:39:57 +03:00
// to stop on non or invalid digit characters and return with an error
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2022-10-16 09:28:57 +03:00
pub fn common_parse_int ( _s string , base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) ! i64 {
2022-09-08 11:09:13 +03:00
if _s . len < 1 {
2019-10-17 19:37:55 +03:00
// return error('parse_int: syntax error $s')
2019-10-18 08:20:03 +03:00
return i64 ( 0 )
2019-10-17 19:37:55 +03:00
}
2022-09-08 11:09:13 +03:00
mut bit_size := _bit_size
if bit_size == 0 {
bit_size = strconv . int_size
}
mut s := _s
2019-10-17 19:37:55 +03:00
// Pick off leading sign.
mut neg := false
if s [ 0 ] == ` + ` {
2023-04-09 06:54:38 +03:00
// s = s[1..]
unsafe {
s = tos ( s . str + 1 , s . len - 1 )
}
2020-12-21 10:35:24 +03:00
} else if s [ 0 ] == ` - ` {
2019-10-17 19:37:55 +03:00
neg = true
2023-04-09 06:54:38 +03:00
// s = s[1..]
unsafe {
s = tos ( s . str + 1 , s . len - 1 )
}
2019-10-17 19:37:55 +03:00
}
2023-04-09 06:54:38 +03:00
2019-10-17 19:37:55 +03:00
// Convert unsigned and check range.
// un := parse_uint(s, base, bit_size) or {
2019-12-20 00:29:37 +03:00
// return i64(0)
2019-10-18 08:20:03 +03:00
// }
2022-10-16 09:28:57 +03:00
un := common_parse_uint ( s , base , bit_size , error_on_non_digit , error_on_high_digit ) !
2019-10-17 19:37:55 +03:00
if un == 0 {
return i64 ( 0 )
}
// TODO: check should u64(bit_size-1) be size of int (32)?
2020-12-21 10:35:24 +03:00
cutoff := u64 ( 1 ) << u64 ( bit_size - 1 )
2019-10-17 19:37:55 +03:00
if ! neg && un >= cutoff {
// return error('parse_int: range error $s0')
2019-12-20 00:29:37 +03:00
return i64 ( cutoff - u64 ( 1 ) )
2019-10-17 19:37:55 +03:00
}
if neg && un > cutoff {
// return error('parse_int: range error $s0')
return - i64 ( cutoff )
}
2021-06-18 17:59:56 +03:00
return if neg { - i64 ( un ) } else { i64 ( un ) }
2019-10-17 19:37:55 +03:00
}
2019-12-20 00:29:37 +03:00
2019-11-28 09:46:10 +03:00
// parse_int interprets a string s in the given base (0, 2 to 36) and
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
2022-10-16 09:28:57 +03:00
pub fn parse_int ( _s string , base int , _bit_size int ) ! i64 {
2019-11-28 09:46:10 +03:00
return common_parse_int ( _s , base , _bit_size , true , true )
}
2019-10-17 19:37:55 +03:00
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2022-10-16 09:28:57 +03:00
pub fn atoi ( s string ) ! int {
2020-12-21 10:35:24 +03:00
if s == ' ' {
2022-09-08 11:09:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " " : i n v a l i d s y n t a x ' )
2020-12-21 10:35:24 +03:00
}
2021-06-18 17:59:56 +03:00
if ( strconv . int_size == 32 && ( 0 < s . len && s . len < 10 ) )
|| ( strconv . int_size == 64 && ( 0 < s . len && s . len < 19 ) ) {
2019-10-17 19:37:55 +03:00
// Fast path for small integers that fit int type.
2019-10-18 08:20:03 +03:00
mut start_idx := 0
2019-10-17 19:37:55 +03:00
if s [ 0 ] == ` - ` || s [ 0 ] == ` + ` {
2019-10-18 08:20:03 +03:00
start_idx ++
2019-12-20 00:29:37 +03:00
if s . len - start_idx < 1 {
2019-10-17 19:37:55 +03:00
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2022-11-15 16:53:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " $ { s } " : i n v a l i d s y n t a x ' )
2019-10-17 19:37:55 +03:00
}
}
mut n := 0
2019-12-20 00:29:37 +03:00
for i in start_idx .. s . len {
ch := s [ i ] - ` 0 `
2019-10-17 19:37:55 +03:00
if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2022-11-15 16:53:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " $ { s } " : i n v a l i d s y n t a x ' )
2019-10-17 19:37:55 +03:00
}
2019-12-20 00:29:37 +03:00
n = n * 10 + int ( ch )
2019-10-17 19:37:55 +03:00
}
2021-06-18 17:59:56 +03:00
return if s [ 0 ] == ` - ` { - n } else { n }
2019-10-17 19:37:55 +03:00
}
// Slow path for invalid, big, or underscored integers.
2022-10-16 09:28:57 +03:00
int64 := parse_int ( s , 10 , 0 ) !
2019-10-17 19:37:55 +03:00
return int ( int64 )
}