strconv module + use it in builtin/string instead of C functions

2023-08-10 21:13:21 +03:00 · 2019-10-18 03:37:55 +11:00 · 2019-10-18 03:37:55 +11:00 · 8d16762f03
commit 8d16762f03
parent 270934441c
2 changed files with 270 additions and 13 deletions
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@ -41,6 +41,8 @@ NB: A V string should be/is immutable from the point of view of
    when used with modules using C functions (for example os and so on).
 */
 import strconv
 struct string {
 //mut:
 	//hash_cache int
@ -180,12 +182,12 @@ pub fn (s string) replace(rep, with string) string {
 }
 pub fn (s string) int() int {
-	return C.atoi(*char(s.str))
+	return strconv.parse_int(s, 0, 32)
 }
 pub fn (s string) i64() i64 {
-	return C.atoll(*char(s.str))
+	return strconv.parse_int(s, 0, 64)
 }
 pub fn (s string) f32() f32 {
@ -197,20 +199,11 @@ pub fn (s string) f64() f64 {
 }
 pub fn (s string) u32() u32 {
-	//$if tinyc {
+	return strconv.parse_uint(s, 0, 32)
 		//return u32(s.int()) // TODO
 	//} $else {
 		return C.strtoul(*char(s.str), 0, 0)
 	//}
 }
 pub fn (s string) u64() u64 {
-	//$if tinyc {
+	return strconv.parse_uint(s, 0, 64)
 		//return u64(s.i64()) // TODO
 	//} $else {
 		return C.strtoull(*char(s.str), 0, 0)
 	//}
 	//return C.atoll(s.str) // temporary fix for tcc on windows.
 }
 // ==
--- a/vlib/strconv/atoi.v
+++ b/vlib/strconv/atoi.v
@ -0,0 +1,264 @@
 // Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
 // Use of this source code is governed by an MIT license
 // that can be found in the LICENSE file.
 // TODO: use optionals, or some way to return default with error.
 module strconv
 const(
    // int_size is the size in bits of an int or uint value.
    // int_size = 32 << (~u32(0) >> 63)
    // max_u64 = u64(u64(1 << 63) - 1)
 	int_size = 32
 	max_u64  = u64(C.UINT64_MAX) // use this until we add support
 )
 fn byte_to_lower(c byte) byte {
    return c | (`x` - `X`)
 }
 // parse_uint is like parse_int but for unsigned numbers.
 pub fn parse_uint(_s string, _base int, _bit_size int) u64 {
 	mut s := _s.trim_space()
    mut bit_size := _bit_size
    mut base := _base
 	if s == "" || !underscore_ok(s) {
 		// return error('parse_uint: syntax error $s')
        return u64(0)
 	}
 	base0 := base == 0
 	s0 := s
 	if 2 <= base && base <= 36 {
 		// valid base; nothing to do
 	} else if base == 0 {
 		// Look for octal, hex prefix.
        base = 10
 		if s[0] == `0` {
 			if s.len >= 3 && byte_to_lower(s[1]) == `b` { 
                base = 2 
                s = s.right(2) 
            }
 			else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
                base = 8
                s = s.right(2)
 			}
 			else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
                base = 16
                s = s.right(2)
 			}
 			else {
                base = 8
                s = s.right(1)
            }
 		}
 	} else {
 		// return error('parse_uint: base error $s0 - $base')
        return u64(0)
 	}
 	if bit_size == 0 {
 		bit_size = int(int_size)
 	} else if bit_size < 0 || bit_size > 64 {
 		// return error('parse_uint: bitsize error $s0 - $bit_size')
 		return u64(0)
 	}
 	// Cutoff is the smallest number such that cutoff*base > maxUint64.
 	// Use compile-time constants for common cases.
    cutoff := u64(max_u64/u64(base)) + u64(1)
    max_val := if bit_size == 64 {
 		// TODO: investigate
 		// u64(1)<<64(bit_size) - u64(1)
 		max_u64
    } else {
        u64(u32(1)<<u32(bit_size - u32(1)))
    }
 	mut underscores := false
 	mut n := u64(0)
 	for _, c in s {
 		mut d := byte(0)
        cl := byte_to_lower(c)
 		if c == `_` && base0 {
 			// underscore_ok already called
 			underscores = true
 			continue
        }
        else if `0` <= c && c <= `9`   { d = c - `0` }
        else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
        else {
 			// return error('parse_uint: syntax error $s0')
            return u64(0)
 		}
 		if d >= byte(base) {
 			// return error('parse_uint: syntax error $s0')
 			return u64(0)
 		}
 		if n >= cutoff {
 			// n*base overflows
 			// return error('parse_uint: range error $s0')
            return max_val
 		}
 		n *= u64(base)
 		n1 := n + u64(d)
 		if n1 < n || n1 > u64(max_val) {
            // n+v overflows
 			// return error('parse_uint: range error $s0')
            return max_val
 		}
 		n = n1
 	}
 	if underscores && !underscore_ok(s0) {
 			// return error('parse_uint: syntax error $s0')
 			return u64(0)
 	}
    return n
 }
 // parse_int interprets a string s in the given base (0, 2 to 36) and
 // bit size (0 to 64) and returns the corresponding value i.
 //
 // If the base argument is 0, the true base is implied by the string's
 // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
 // Also, for argument base 0 only, underscore characters are permitted
 // as defined by the Go syntax for integer literals.
 //
 // The bitSize argument specifies the integer type
 // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
 // correspond to int, int8, int16, int32, and int64.
 // If bitSize is below 0 or above 64, an error is returned.
 pub fn parse_int(_s string, base int, _bit_size int) i64 {
 	mut s := _s
    mut bit_size := _bit_size
 	if s == '' {
 		// return error('parse_int: syntax error $s')
        return i64(0)
 	}
 	// Pick off leading sign.
 	s0 := s
 	mut neg := false
 	if s[0] == `+` {
 		s = s.right(1)
 	} else if s[0] == `-` {
 		neg = true
 		s = s.right(1)
 	}
 	// Convert unsigned and check range.
 	// un := parse_uint(s, base, bit_size) or {
    //     return i64(0)
    // }
 	un := parse_uint(s, base, bit_size)
 	if un == 0 {
 		return i64(0)
 	}
 	if bit_size == 0 {
 		bit_size = int(int_size)
 	}
 	// TODO: check should u64(bit_size-1) be size of int (32)?
 	cutoff := u64(u64(1) << u64(bit_size-1))
 	if !neg && un >= cutoff {
 		// return error('parse_int: range error $s0')
        return i64(cutoff - u64(1))
 	}
 	if neg && un > cutoff {
 		// return error('parse_int: range error $s0')
 		return -i64(cutoff)
 	}
 	return if neg { -i64(un) } else { i64(un) }
 }
 // atoi is equivalent to parse_int(s, 10, 0), converted to type int.
 pub fn atoi(_s string) int {
 	mut s := _s
 	if (int_size == 32 && (0 < s.len && s.len < 10)) ||
 		(int_size == 64 && (0 < s.len && s.len < 19)) {
 		// Fast path for small integers that fit int type.
 		s0 := s
 		if s[0] == `-` || s[0] == `+` {
 			s = s.right(1)
 			if s.len < 1 {
 				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
                return 0
 			}
 		}
 		mut n := 0
 		for _, ch0 in s {
            ch :=  ch0 - `0`
 			if ch > 9 {
 				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
                return 0
 			}
 			n = n*10 + int(ch)
 		}
 		return if s0[0] == `-` { -n } else { n }
 	}
 	// Slow path for invalid, big, or underscored integers.
 	int64 := parse_int(s, 10, 0)
 	return int(int64)
 }
 // underscore_ok reports whether the underscores in s are allowed.
 // Checking them in this one function lets all the parsers skip over them simply.
 // Underscore must appear only between digits or between a base prefix and a digit.
 fn underscore_ok(_s string) bool {
 	mut s := _s
 	// saw tracks the last character (class) we saw:
 	// ^ for beginning of number,
 	// 0 for a digit or base prefix,
 	// _ for an underscore,
 	// ! for none of the above.
 	mut saw := `^`
 	mut i := 0
 	// Optional sign.
 	if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
 		s = s.right(1)
 	}
 	// Optional base prefix.
 	mut hex := false
 	if s.len >= 2 && s[0] == `0` && (byte_to_lower(s[1]) == `b` || byte_to_lower(s[1]) == `o` || byte_to_lower(s[1]) == `x`) {
 		i = 2
 		saw = `0` // base prefix counts as a digit for "underscore as digit separator"
 		hex = byte_to_lower(s[1]) == `x`
 	}
 	// Number proper.
 	for ; i < s.len; i++ {
 		// Digits are always okay.
 		if (`0` <= s[i] && s[i] <= `9`) || (hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
 			saw = `0`
 			continue
 		}
 		// Underscore must follow digit.
 		if s[i] == `_` {
 			if saw != `0` {
 				return false
 			}
 			saw = `_`
 			continue
 		}
 		// Underscore must also be followed by digit.
 		if saw == `_` {
 			return false
 		}
 		// Saw non-digit, non-underscore.
 		saw = `!`
 	}
 	return saw != `_`
 }