2020-10-07 11:06:52 +03:00
module strconv
2020-12-21 10:35:24 +03:00
2023-03-28 23:55:57 +03:00
// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
2019-10-17 19:37:55 +03:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2023-01-09 09:36:45 +03:00
// TODO: use options, or some way to return default with error.
2019-12-20 00:29:37 +03:00
const (
2020-12-21 10:35:24 +03:00
// int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
2019-10-17 19:37:55 +03:00
int_size = 32
2020-12-21 10:35:24 +03:00
max_u64 = u64 ( 18446744073709551615 ) // as u64 // use this until we add support
2019-10-17 19:37:55 +03:00
)
2022-09-08 11:09:13 +03:00
[ inline ]
2022-04-15 18:25:45 +03:00
pub fn byte_to_lower ( c u8 ) u8 {
2022-09-08 11:09:13 +03:00
return c | 32
2019-10-17 19:37:55 +03:00
}
2019-11-28 09:46:10 +03:00
// common_parse_uint is called by parse_uint and allows the parsing
2021-07-02 10:39:57 +03:00
// to stop on non or invalid digit characters and return with an error
2022-10-16 09:28:57 +03:00
pub fn common_parse_uint ( s string , _base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) ! u64 {
2021-07-02 10:39:57 +03:00
result , err := common_parse_uint2 ( s , _base , _bit_size )
// TODO: error_on_non_digit and error_on_high_digit have no difference
if err != 0 && ( error_on_non_digit || error_on_high_digit ) {
match err {
2022-11-15 16:53:13 +03:00
- 1 { return error ( ' c o m m o n _ p a r s e _ u i n t : w r o n g b a s e $ { _base } f o r $ { s } ' ) }
- 2 { return error ( ' c o m m o n _ p a r s e _ u i n t : w r o n g b i t s i z e $ { _bit_size } f o r $ { s } ' ) }
- 3 { return error ( ' c o m m o n _ p a r s e _ u i n t : i n t e g e r o v e r f l o w $ { s } ' ) }
else { return error ( ' c o m m o n _ p a r s e _ u i n t : s y n t a x e r r o r $ { s } ' ) }
2020-10-03 20:57:37 +03:00
}
}
return result
}
// the first returned value contains the parsed value,
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2020-10-03 20:57:37 +03:00
pub fn common_parse_uint2 ( s string , _base int , _bit_size int ) ( u64 , int ) {
2019-10-18 08:20:03 +03:00
if s . len < 1 || ! underscore_ok ( s ) {
2019-10-17 19:37:55 +03:00
// return error('parse_uint: syntax error $s')
2020-10-03 20:57:37 +03:00
return u64 ( 0 ) , 1
2019-10-17 19:37:55 +03:00
}
2022-09-08 11:09:13 +03:00
mut bit_size := _bit_size
mut base := _base
2019-10-18 08:20:03 +03:00
mut start_index := 0
2019-10-17 19:37:55 +03:00
if 2 <= base && base <= 36 {
// valid base; nothing to do
2020-12-21 10:35:24 +03:00
} else if base == 0 {
2019-10-17 19:37:55 +03:00
// Look for octal, hex prefix.
2019-10-18 08:20:03 +03:00
base = 10
2019-10-17 19:37:55 +03:00
if s [ 0 ] == ` 0 ` {
2022-09-08 11:09:13 +03:00
if s . len >= 3 && s [ 1 ] | 32 == ` b ` {
2019-11-11 17:18:32 +03:00
base = 2
2019-10-18 08:20:03 +03:00
start_index += 2
2022-09-08 11:09:13 +03:00
} else if s . len >= 3 && s [ 1 ] | 32 == ` o ` {
2019-10-18 08:20:03 +03:00
base = 8
start_index += 2
2022-09-08 11:09:13 +03:00
} else if s . len >= 3 && s [ 1 ] | 32 == ` x ` {
2019-10-18 08:20:03 +03:00
base = 16
start_index += 2
2019-10-17 19:37:55 +03:00
}
2019-12-08 23:22:33 +03:00
// manage leading zeros in decimal base's numbers
2019-12-20 00:29:37 +03:00
else if s . len >= 2 && ( s [ 1 ] >= ` 0 ` && s [ 1 ] <= ` 9 ` ) {
base = 10
start_index ++
2020-12-21 10:35:24 +03:00
} else {
2019-10-18 08:20:03 +03:00
base = 8
start_index ++
}
2019-10-17 19:37:55 +03:00
}
2020-12-21 10:35:24 +03:00
} else {
2019-10-18 08:20:03 +03:00
// return error('parse_uint: base error $s - $base')
2020-10-03 20:57:37 +03:00
return u64 ( 0 ) , - 1
2019-10-17 19:37:55 +03:00
}
if bit_size == 0 {
2021-06-18 17:59:56 +03:00
bit_size = strconv . int_size
2020-12-21 10:35:24 +03:00
} else if bit_size < 0 || bit_size > 64 {
2019-10-18 08:20:03 +03:00
// return error('parse_uint: bitsize error $s - $bit_size')
2020-10-03 20:57:37 +03:00
return u64 ( 0 ) , - 2
2019-10-17 19:37:55 +03:00
}
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
2021-06-18 17:59:56 +03:00
cutoff := strconv . max_u64 / u64 ( base ) + u64 ( 1 )
max_val := if bit_size == 64 { strconv . max_u64 } else { ( u64 ( 1 ) << u64 ( bit_size ) ) - u64 ( 1 ) }
2019-10-17 19:37:55 +03:00
mut n := u64 ( 0 )
2019-12-20 00:29:37 +03:00
for i in start_index .. s . len {
2019-10-18 08:20:03 +03:00
c := s [ i ]
2022-09-08 11:09:13 +03:00
cl := c | 32
2021-09-03 12:16:07 +03:00
2022-04-15 14:58:56 +03:00
mut d := u8 ( 0 )
2022-09-08 11:09:13 +03:00
if c == ` _ ` && _base == 0 {
2019-10-17 19:37:55 +03:00
// underscore_ok already called
continue
2020-12-21 10:35:24 +03:00
} else if ` 0 ` <= c && c <= ` 9 ` {
2019-12-20 00:29:37 +03:00
d = c - ` 0 `
2020-12-21 10:35:24 +03:00
} else if ` a ` <= cl && cl <= ` z ` {
2019-12-20 00:29:37 +03:00
d = cl - ` a ` + 10
2020-12-21 10:35:24 +03:00
} else {
2020-10-03 20:57:37 +03:00
return n , i + 1
2019-10-17 19:37:55 +03:00
}
2022-04-15 14:45:52 +03:00
if d >= u8 ( base ) {
2020-10-03 20:57:37 +03:00
return n , i + 1
2019-10-17 19:37:55 +03:00
}
if n >= cutoff {
// n*base overflows
2019-10-18 08:20:03 +03:00
// return error('parse_uint: range error $s')
2020-10-03 20:57:37 +03:00
return max_val , - 3
2019-10-17 19:37:55 +03:00
}
n *= u64 ( base )
n1 := n + u64 ( d )
2019-10-18 08:20:03 +03:00
if n1 < n || n1 > max_val {
// n+v overflows
// return error('parse_uint: range error $s')
2020-10-03 20:57:37 +03:00
return max_val , - 3
2019-10-17 19:37:55 +03:00
}
n = n1
}
2020-10-03 20:57:37 +03:00
return n , 0
2019-11-28 09:46:10 +03:00
}
2019-10-17 19:37:55 +03:00
2019-11-28 09:46:10 +03:00
// parse_uint is like parse_int but for unsigned numbers.
2022-10-16 09:28:57 +03:00
pub fn parse_uint ( s string , _base int , _bit_size int ) ! u64 {
2019-11-28 09:46:10 +03:00
return common_parse_uint ( s , _base , _bit_size , true , true )
2019-10-17 19:37:55 +03:00
}
2019-11-28 09:46:10 +03:00
// common_parse_int is called by parse int and allows the parsing
2021-07-02 10:39:57 +03:00
// to stop on non or invalid digit characters and return with an error
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2022-10-16 09:28:57 +03:00
pub fn common_parse_int ( _s string , base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) ! i64 {
2022-09-08 11:09:13 +03:00
if _s . len < 1 {
2019-10-17 19:37:55 +03:00
// return error('parse_int: syntax error $s')
2019-10-18 08:20:03 +03:00
return i64 ( 0 )
2019-10-17 19:37:55 +03:00
}
2022-09-08 11:09:13 +03:00
mut bit_size := _bit_size
if bit_size == 0 {
bit_size = strconv . int_size
}
mut s := _s
2019-10-17 19:37:55 +03:00
// Pick off leading sign.
mut neg := false
if s [ 0 ] == ` + ` {
2019-10-27 10:03:15 +03:00
s = s [ 1 .. ]
2020-12-21 10:35:24 +03:00
} else if s [ 0 ] == ` - ` {
2019-10-17 19:37:55 +03:00
neg = true
2019-10-27 10:03:15 +03:00
s = s [ 1 .. ]
2019-10-17 19:37:55 +03:00
}
// Convert unsigned and check range.
// un := parse_uint(s, base, bit_size) or {
2019-12-20 00:29:37 +03:00
// return i64(0)
2019-10-18 08:20:03 +03:00
// }
2022-10-16 09:28:57 +03:00
un := common_parse_uint ( s , base , bit_size , error_on_non_digit , error_on_high_digit ) !
2019-10-17 19:37:55 +03:00
if un == 0 {
return i64 ( 0 )
}
// TODO: check should u64(bit_size-1) be size of int (32)?
2020-12-21 10:35:24 +03:00
cutoff := u64 ( 1 ) << u64 ( bit_size - 1 )
2019-10-17 19:37:55 +03:00
if ! neg && un >= cutoff {
// return error('parse_int: range error $s0')
2019-12-20 00:29:37 +03:00
return i64 ( cutoff - u64 ( 1 ) )
2019-10-17 19:37:55 +03:00
}
if neg && un > cutoff {
// return error('parse_int: range error $s0')
return - i64 ( cutoff )
}
2021-06-18 17:59:56 +03:00
return if neg { - i64 ( un ) } else { i64 ( un ) }
2019-10-17 19:37:55 +03:00
}
2019-12-20 00:29:37 +03:00
2019-11-28 09:46:10 +03:00
// parse_int interprets a string s in the given base (0, 2 to 36) and
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
2022-10-16 09:28:57 +03:00
pub fn parse_int ( _s string , base int , _bit_size int ) ! i64 {
2019-11-28 09:46:10 +03:00
return common_parse_int ( _s , base , _bit_size , true , true )
}
2019-10-17 19:37:55 +03:00
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2022-10-16 09:28:57 +03:00
pub fn atoi ( s string ) ! int {
2020-12-21 10:35:24 +03:00
if s == ' ' {
2022-09-08 11:09:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " " : i n v a l i d s y n t a x ' )
2020-12-21 10:35:24 +03:00
}
2021-06-18 17:59:56 +03:00
if ( strconv . int_size == 32 && ( 0 < s . len && s . len < 10 ) )
|| ( strconv . int_size == 64 && ( 0 < s . len && s . len < 19 ) ) {
2019-10-17 19:37:55 +03:00
// Fast path for small integers that fit int type.
2019-10-18 08:20:03 +03:00
mut start_idx := 0
2019-10-17 19:37:55 +03:00
if s [ 0 ] == ` - ` || s [ 0 ] == ` + ` {
2019-10-18 08:20:03 +03:00
start_idx ++
2019-12-20 00:29:37 +03:00
if s . len - start_idx < 1 {
2019-10-17 19:37:55 +03:00
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2022-11-15 16:53:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " $ { s } " : i n v a l i d s y n t a x ' )
2019-10-17 19:37:55 +03:00
}
}
mut n := 0
2019-12-20 00:29:37 +03:00
for i in start_idx .. s . len {
ch := s [ i ] - ` 0 `
2019-10-17 19:37:55 +03:00
if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2022-11-15 16:53:13 +03:00
return error ( ' s t r c o n v . a t o i : p a r s i n g " $ { s } " : i n v a l i d s y n t a x ' )
2019-10-17 19:37:55 +03:00
}
2019-12-20 00:29:37 +03:00
n = n * 10 + int ( ch )
2019-10-17 19:37:55 +03:00
}
2021-06-18 17:59:56 +03:00
return if s [ 0 ] == ` - ` { - n } else { n }
2019-10-17 19:37:55 +03:00
}
// Slow path for invalid, big, or underscored integers.
2022-10-16 09:28:57 +03:00
int64 := parse_int ( s , 10 , 0 ) !
2019-10-17 19:37:55 +03:00
return int ( int64 )
}
// underscore_ok reports whether the underscores in s are allowed.
// Checking them in this one function lets all the parsers skip over them simply.
// Underscore must appear only between digits or between a base prefix and a digit.
2022-09-08 11:09:13 +03:00
[ direct_array_access ]
2019-10-18 08:20:03 +03:00
fn underscore_ok ( s string ) bool {
2019-10-17 19:37:55 +03:00
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
mut saw := ` ^ `
mut i := 0
// Optional sign.
if s . len >= 1 && ( s [ 0 ] == ` - ` || s [ 0 ] == ` + ` ) {
2019-10-18 08:20:03 +03:00
i ++
2019-10-17 19:37:55 +03:00
}
// Optional base prefix.
mut hex := false
2022-09-08 11:09:13 +03:00
if ( s . len - i >= 2 ) && ( s [ i ] == ` 0 ` ) && ( ( ( s [ i + 1 ] | 32 ) == ` b ` )
|| ( ( s [ i + 1 ] | 32 ) == ` o ` ) || ( ( s [ i + 1 ] | 32 ) == ` x ` ) ) {
2019-10-17 19:37:55 +03:00
saw = ` 0 ` // base prefix counts as a digit for "underscore as digit separator"
2022-09-08 11:09:13 +03:00
hex = ( s [ i + 1 ] | 32 ) == ` x `
2019-12-20 00:29:37 +03:00
i += 2
2019-10-17 19:37:55 +03:00
}
// Number proper.
for ; i < s . len ; i ++ {
// Digits are always okay.
2022-09-08 11:09:13 +03:00
if ( ` 0 ` <= s [ i ] && s [ i ] <= ` 9 ` ) || ( ( hex && ` a ` <= ( s [ i ] | 32 ) ) && ( ( s [ i ] | 32 ) <= ` f ` ) ) {
2019-10-17 19:37:55 +03:00
saw = ` 0 `
continue
}
// Underscore must follow digit.
if s [ i ] == ` _ ` {
if saw != ` 0 ` {
return false
}
saw = ` _ `
continue
}
// Underscore must also be followed by digit.
if saw == ` _ ` {
return false
}
// Saw non-digit, non-underscore.
saw = ` ! `
}
return saw != ` _ `
}