1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/strconv/atof.v

560 lines
11 KiB
V
Raw Normal View History

/*
atof util
Copyright (c) 2019 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
This file contains utilities for convert a string in a f64 variable
IEEE 754 standard is used
Know limitation:
- limited to 18 significant digits
The code is inspired by:
Grzegorz Kraszewski krashan@teleinfo.pb.edu.pl
URL: http://krashan.ppa.pl/articles/stringtofloat/
Original license: MIT
*/
2019-12-17 01:07:13 +03:00
module strconv
union Float64u {
mut:
f f64
u u64
}
/*
96 bit operation utilities
Note: when u128 will be available these function can be refactored
*/
2019-12-20 00:29:37 +03:00
2019-12-17 01:07:13 +03:00
// right logical shift 96 bit
2019-12-20 00:29:37 +03:00
fn lsr96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) {
2019-12-17 01:07:13 +03:00
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
2019-12-20 00:29:37 +03:00
r0 = (s0>>1) | ((s1 & u32(1))<<31)
r1 = (s1>>1) | ((s2 & u32(1))<<31)
r2 = s2>>1
2019-12-17 01:07:13 +03:00
return r2,r1,r0
}
// left logical shift 96 bit
2019-12-20 00:29:37 +03:00
fn lsl96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) {
2019-12-17 01:07:13 +03:00
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
2019-12-20 00:29:37 +03:00
r2 = (s2<<1) | ((s1 & (u32(1)<<31))>>31)
r1 = (s1<<1) | ((s0 & (u32(1)<<31))>>31)
r0 = s0<<1
2019-12-17 01:07:13 +03:00
return r2,r1,r0
}
// sum on 96 bit
2019-12-20 00:29:37 +03:00
fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
mut w := u64(0)
2019-12-17 01:07:13 +03:00
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
w = u64(s0) + u64(d0)
r0 = u32(w)
w >>= 32
w += u64(s1) + u64(d1)
r1 = u32(w)
w >>= 32
w += u64(s2) + u64(d2)
r2 = u32(w)
return r2,r1,r0
}
// subtraction on 96 bit
2019-12-20 00:29:37 +03:00
fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
mut w := u64(0)
2019-12-17 01:07:13 +03:00
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
w = u64(s0) - u64(d0)
r0 = u32(w)
w >>= 32
w += u64(s1) - u64(d1)
r1 = u32(w)
w >>= 32
w += u64(s2) - u64(d2)
r2 = u32(w)
return r2,r1,r0
}
/*
Constants
*/
2019-12-20 00:29:37 +03:00
2019-12-17 01:07:13 +03:00
const (
2019-12-20 00:29:37 +03:00
//
// f64 constants
//
2020-05-22 18:36:09 +03:00
digits = 18
double_plus_zero = u64(0x0000000000000000)
double_minus_zero = u64(0x8000000000000000)
double_plus_infinity = u64(0x7FF0000000000000)
double_minus_infinity = u64(0xFFF0000000000000)
2019-12-17 01:07:13 +03:00
//
// parser state machine states
//
fsm_a = 0
fsm_b = 1
fsm_c = 2
fsm_d = 3
fsm_e = 4
fsm_f = 5
fsm_g = 6
fsm_h = 7
fsm_i = 8
2020-05-22 18:36:09 +03:00
fsm_stop = 9
2019-12-17 01:07:13 +03:00
//
// Possible parser return values.
//
parser_ok = 0 // parser finished OK
parser_pzero = 1 // no digits or number is smaller than +-2^-1022
parser_mzero = 2 // number is negative, module smaller
parser_pinf = 3 // number is higher than +HUGE_VAL
parser_minf = 4 // number is lower than -HUGE_VAL
2019-12-17 01:07:13 +03:00
//
// char constants
// Note: Modify these if working with non-ASCII encoding
//
2020-05-22 18:36:09 +03:00
c_dpoint = `.`
c_plus = `+`
c_minus = `-`
c_zero = `0`
c_nine = `9`
c_ten = u32(10)
2019-12-17 01:07:13 +03:00
)
/*
Utility
*/
2019-12-17 01:07:13 +03:00
2019-12-20 00:29:37 +03:00
// NOTE: Modify these if working with non-ASCII encoding
2019-12-17 01:07:13 +03:00
fn is_digit(x byte) bool {
2020-05-22 18:36:09 +03:00
return (x >= c_zero && x <= c_nine) == true
2019-12-17 01:07:13 +03:00
}
fn is_space(x byte) bool {
2020-06-29 09:23:51 +03:00
return (x == `\t` || x == `\n` || x == `\v` || x == `\f` || x == `\r` || x == ` `)
2019-12-17 01:07:13 +03:00
}
fn is_exp(x byte) bool {
return (x == `E` || x == `e`) == true
}
/*
Support struct
*/
2019-12-20 00:29:37 +03:00
2019-12-17 01:07:13 +03:00
// The structure is filled by parser, then given to converter.
2019-12-20 00:29:37 +03:00
pub struct PrepNumber {
2019-12-17 01:07:13 +03:00
pub mut:
negative bool // 0 if positive number, 1 if negative
exponent int // power of 10 exponent
mantissa u64 // integer mantissa
2019-12-17 01:07:13 +03:00
}
/*
String parser
NOTE: #TOFIX need one char after the last char of the number
*/
2019-12-17 01:07:13 +03:00
2019-12-20 00:29:37 +03:00
// parser return a support struct with all the parsing information for the converter
fn parser(s string) (int,PrepNumber) {
mut state := fsm_a
2019-12-20 00:29:37 +03:00
mut digx := 0
mut c := ` ` // initial value for kicking off the state machine
mut result := parser_ok
2019-12-20 00:29:37 +03:00
mut expneg := false
mut expexp := 0
mut i := 0
mut pn := PrepNumber{
}
2020-05-22 18:36:09 +03:00
for state != fsm_stop {
2019-12-20 00:29:37 +03:00
match state {
2019-12-17 01:07:13 +03:00
// skip starting spaces
fsm_a {
2019-12-20 00:29:37 +03:00
if is_space(c) == true {
2019-12-17 01:07:13 +03:00
c = s[i++]
2019-12-20 00:29:37 +03:00
}
else {
state = fsm_b
2019-12-17 01:07:13 +03:00
}
}
// check for the sign or point
fsm_b {
state = fsm_c
2020-05-22 18:36:09 +03:00
if c == c_plus {
2019-12-17 01:07:13 +03:00
c = s[i++]
//i++
2019-12-20 00:29:37 +03:00
}
2020-05-22 18:36:09 +03:00
else if c == c_minus {
2019-12-17 01:07:13 +03:00
pn.negative = true
c = s[i++]
2019-12-20 00:29:37 +03:00
}
else if is_digit(c) {
}
2020-05-22 18:36:09 +03:00
else if c == c_dpoint {
2019-12-20 00:29:37 +03:00
}
2019-12-17 01:07:13 +03:00
else {
2020-05-22 18:36:09 +03:00
state = fsm_stop
2019-12-17 01:07:13 +03:00
}
}
// skip the inital zeros
fsm_c {
2020-05-22 18:36:09 +03:00
if c == c_zero {
2019-12-20 00:29:37 +03:00
c = s[i++]
}
2020-05-22 18:36:09 +03:00
else if c == c_dpoint {
2019-12-17 01:07:13 +03:00
c = s[i++]
state = fsm_d
2019-12-20 00:29:37 +03:00
}
else {
state = fsm_e
2019-12-17 01:07:13 +03:00
}
}
// reading leading zeros in the fractional part of mantissa
fsm_d {
2020-05-22 18:36:09 +03:00
if c == c_zero {
2019-12-17 01:07:13 +03:00
c = s[i++]
2019-12-20 00:29:37 +03:00
if pn.exponent > -2147483647 {
pn.exponent--
}
2019-12-17 01:07:13 +03:00
}
else {
state = fsm_f
2019-12-17 01:07:13 +03:00
}
}
// reading integer part of mantissa
fsm_e {
2019-12-17 01:07:13 +03:00
if is_digit(c) {
2020-05-22 18:36:09 +03:00
if digx < digits {
2019-12-17 01:07:13 +03:00
pn.mantissa *= 10
2020-05-22 18:36:09 +03:00
pn.mantissa += u64(c - c_zero)
2019-12-17 01:07:13 +03:00
digx++
}
2019-12-20 00:29:37 +03:00
else if pn.exponent < 2147483647 {
pn.exponent++
}
2019-12-17 01:07:13 +03:00
c = s[i++]
}
2020-05-22 18:36:09 +03:00
else if c == c_dpoint {
2019-12-17 01:07:13 +03:00
c = s[i++]
state = fsm_f
2019-12-17 01:07:13 +03:00
}
else {
state = fsm_f
2019-12-17 01:07:13 +03:00
}
}
// reading fractional part of mantissa
fsm_f {
2019-12-17 01:07:13 +03:00
if is_digit(c) {
2020-05-22 18:36:09 +03:00
if digx < digits {
2019-12-17 01:07:13 +03:00
pn.mantissa *= 10
2020-05-22 18:36:09 +03:00
pn.mantissa += u64(c - c_zero)
2019-12-17 01:07:13 +03:00
pn.exponent--
digx++
}
c = s[i++]
}
else if is_exp(c) {
c = s[i++]
state = fsm_g
2019-12-17 01:07:13 +03:00
}
else {
state = fsm_g
2019-12-17 01:07:13 +03:00
}
}
// reading sign of exponent
fsm_g {
2020-05-22 18:36:09 +03:00
if c == c_plus {
2019-12-17 01:07:13 +03:00
c = s[i++]
2019-12-20 00:29:37 +03:00
}
2020-05-22 18:36:09 +03:00
else if c == c_minus {
2019-12-17 01:07:13 +03:00
expneg = true
c = s[i++]
}
state = fsm_h
2019-12-17 01:07:13 +03:00
}
// skipping leading zeros of exponent
fsm_h {
2020-05-22 18:36:09 +03:00
if c == c_zero {
2019-12-17 01:07:13 +03:00
c = s[i++]
}
else {
state = fsm_i
2019-12-17 01:07:13 +03:00
}
}
// reading exponent digits
fsm_i {
2019-12-17 01:07:13 +03:00
if is_digit(c) {
if expexp < 214748364 {
expexp *= 10
2020-05-22 18:36:09 +03:00
expexp += int(c - c_zero)
2019-12-17 01:07:13 +03:00
}
c = s[i++]
}
else {
2020-05-22 18:36:09 +03:00
state = fsm_stop
2019-12-17 01:07:13 +03:00
}
}
2019-12-20 00:29:37 +03:00
else {
}}
// C.printf("len: %d i: %d str: %s \n",s.len,i,s[..i])
if i >= s.len {
2020-05-22 18:36:09 +03:00
state = fsm_stop
2019-12-17 01:07:13 +03:00
}
}
if expneg {
expexp = -expexp
}
pn.exponent += expexp
if pn.mantissa == 0 {
if pn.negative {
result = parser_mzero
2019-12-20 00:29:37 +03:00
}
else {
result = parser_pzero
2019-12-17 01:07:13 +03:00
}
}
else if pn.exponent > 309 {
2019-12-17 01:07:13 +03:00
if pn.negative {
result = parser_minf
2019-12-20 00:29:37 +03:00
}
else {
result = parser_pinf
2019-12-17 01:07:13 +03:00
}
}
else if pn.exponent < -328 {
if pn.negative {
result = parser_mzero
2019-12-20 00:29:37 +03:00
}
else {
result = parser_pzero
2019-12-17 01:07:13 +03:00
}
}
return result,pn
}
/*
Converter to the bit form of the f64 number
*/
2019-12-20 00:29:37 +03:00
2019-12-17 01:07:13 +03:00
// converter return a u64 with the bit image of the f64 number
2020-06-04 11:35:40 +03:00
fn converter(mut pn PrepNumber) u64 {
2019-12-17 01:07:13 +03:00
mut binexp := 92
2019-12-20 00:29:37 +03:00
mut s2 := u32(0) // 96-bit precision integer
mut s1 := u32(0)
mut s0 := u32(0)
mut q2 := u32(0) // 96-bit precision integer
mut q1 := u32(0)
mut q0 := u32(0)
mut r2 := u32(0) // 96-bit precision integer
mut r1 := u32(0)
mut r0 := u32(0)
mask28 := u32(0xF<<28)
2019-12-17 01:07:13 +03:00
mut result := u64(0)
// working on 3 u32 to have 96 bit precision
s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF))
2019-12-20 00:29:37 +03:00
s1 = u32(pn.mantissa>>32)
2019-12-17 01:07:13 +03:00
s2 = u32(0)
// so we take the decimal exponent off
for pn.exponent > 0 {
2019-12-20 00:29:37 +03:00
q2,q1,q0 = lsl96(s2, s1, s0) // q = s * 2
r2,r1,r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
s2,s1,s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
s2,s1,s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
2019-12-17 01:07:13 +03:00
pn.exponent--
for (s2 & mask28) != 0 {
2019-12-20 00:29:37 +03:00
q2,q1,q0 = lsr96(s2, s1, s0)
2019-12-17 01:07:13 +03:00
binexp++
s2 = q2
s1 = q1
s0 = q0
}
}
for pn.exponent < 0 {
2019-12-20 00:29:37 +03:00
for !((s2 & (u32(1)<<31)) != 0) {
q2,q1,q0 = lsl96(s2, s1, s0)
2019-12-17 01:07:13 +03:00
binexp--
s2 = q2
s1 = q1
s0 = q0
}
2020-05-22 18:36:09 +03:00
q2 = s2 / c_ten
r1 = s2 % c_ten
2019-12-20 00:29:37 +03:00
r2 = (s1>>8) | (r1<<24)
2020-05-22 18:36:09 +03:00
q1 = r2 / c_ten
r1 = r2 % c_ten
2019-12-20 00:29:37 +03:00
r2 = ((s1 & u32(0xFF))<<16) | (s0>>16) | (r1<<24)
2020-05-22 18:36:09 +03:00
r0 = r2 / c_ten
r1 = r2 % c_ten
2019-12-20 00:29:37 +03:00
q1 = (q1<<8) | ((r0 & u32(0x00FF0000))>>16)
q0 = r0<<16
r2 = (s0 & u32(0xFFFF)) | (r1<<16)
2020-05-22 18:36:09 +03:00
q0 |= r2 / c_ten
2019-12-17 01:07:13 +03:00
s2 = q2
s1 = q1
s0 = q0
pn.exponent++
}
2019-12-20 00:29:37 +03:00
// C.printf("mantissa before normalization: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
2019-12-17 01:07:13 +03:00
// normalization, the 28 bit in s2 must the leftest one in the variable
2019-12-20 00:29:37 +03:00
if s2 != 0 || s1 != 0 || s0 != 0 {
2019-12-17 01:47:30 +03:00
for (s2 & mask28) == 0 {
2019-12-20 00:29:37 +03:00
q2,q1,q0 = lsl96(s2, s1, s0)
2019-12-17 01:07:13 +03:00
binexp--
s2 = q2
s1 = q1
s0 = q0
}
}
// rounding if needed
/*
* "round half to even" algorithm
* Example for f32, just a reminder
*
* If bit 54 is 0, round down
* If bit 54 is 1
* If any bit beyond bit 54 is 1, round up
* If all bits beyond bit 54 are 0 (meaning the number is halfway between two floating-point numbers)
* If bit 53 is 0, round down
* If bit 53 is 1, round up
*/
/* test case 1 complete
s2=0x1FFFFFFF
s1=0xFFFFFF80
s0=0x0
*/
/* test case 1 check_round_bit
s2=0x18888888
s1=0x88888880
s0=0x0
*/
/* test case check_round_bit + normalization
s2=0x18888888
s1=0x88888F80
s0=0x0
*/
2019-12-20 00:29:37 +03:00
// C.printf("mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
2019-12-17 01:07:13 +03:00
// s1 => 0xFFFFFFxx only F are rapresented
nbit := 7
2019-12-20 00:29:37 +03:00
check_round_bit := u32(1)<<u32(nbit)
check_round_mask := u32(0xFFFFFFFF)<<u32(nbit)
2019-12-17 01:07:13 +03:00
if (s1 & check_round_bit) != 0 {
2019-12-20 00:29:37 +03:00
// C.printf("need round!! cehck mask: %08x\n", s1 & ~check_round_mask )
2019-12-17 01:07:13 +03:00
if (s1 & ~check_round_mask) != 0 {
2019-12-20 00:29:37 +03:00
// C.printf("Add 1!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
}
else {
// C.printf("All 0!\n")
if (s1 & (check_round_bit<<u32(1))) != 0 {
// C.printf("Add 1 form -1 bit control!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
2019-12-17 01:07:13 +03:00
}
}
s1 = s1 & check_round_mask
s0 = u32(0)
2019-12-22 01:38:02 +03:00
// recheck normalization
if s2 & (mask28<<u32(1)) != 0 {
// C.printf("Renormalize!!")
q2,q1,q0 = lsr96(s2, s1, s0)
binexp--
s2 = q2
s1 = q1
s0 = q0
}
2019-12-17 01:07:13 +03:00
}
2019-12-20 00:29:37 +03:00
// tmp := ( u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8)
// C.printf("mantissa after rounding : %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
// C.printf("Tmp result: %016x\n",tmp)
2019-12-17 01:07:13 +03:00
// end rounding
// offset the binary exponent IEEE 754
binexp += 1023
2019-12-17 01:47:30 +03:00
if binexp > 2046 {
2019-12-17 01:07:13 +03:00
if pn.negative {
2020-05-22 18:36:09 +03:00
result = double_minus_infinity
2019-12-20 00:29:37 +03:00
}
else {
2020-05-22 18:36:09 +03:00
result = double_plus_infinity
2019-12-17 01:07:13 +03:00
}
}
else if binexp < 1 {
if pn.negative {
2020-05-22 18:36:09 +03:00
result = double_minus_zero
2019-12-22 01:41:42 +03:00
}
else {
2020-05-22 18:36:09 +03:00
result = double_plus_zero
2019-12-17 01:07:13 +03:00
}
}
else if s2 != 0 {
mut q := u64(0)
2019-12-20 00:29:37 +03:00
binexs2 := u64(binexp)<<52
q = (u64(s2 & ~mask28)<<24) | ((u64(s1) + u64(128))>>8) | binexs2
2019-12-17 01:07:13 +03:00
if pn.negative {
2019-12-20 00:29:37 +03:00
q |= (u64(1)<<63)
2019-12-17 01:07:13 +03:00
}
result = q
}
return result
}
/*
Public functions
*/
2019-12-20 00:29:37 +03:00
2019-12-17 01:07:13 +03:00
// atof64 return a f64 from a string doing a parsing operation
pub fn atof64(s string) f64 {
2019-12-20 00:29:37 +03:00
mut pn := PrepNumber{
}
2019-12-17 01:07:13 +03:00
mut res_parsing := 0
mut res := Float64u{}
2019-12-20 00:29:37 +03:00
res_parsing,pn = parser(s + ' ') // TODO: need an extra char for now
// println(pn)
2019-12-17 01:07:13 +03:00
match res_parsing {
parser_ok {
res.u = converter(mut pn)
2019-12-17 01:07:13 +03:00
}
parser_pzero {
2020-05-22 18:36:09 +03:00
res.u = double_plus_zero
2019-12-17 01:07:13 +03:00
}
parser_mzero {
2020-05-22 18:36:09 +03:00
res.u = double_minus_zero
2019-12-17 01:07:13 +03:00
}
parser_pinf {
2020-05-22 18:36:09 +03:00
res.u = double_plus_infinity
2019-12-17 01:07:13 +03:00
}
parser_minf {
2020-05-22 18:36:09 +03:00
res.u = double_minus_infinity
2019-12-17 01:07:13 +03:00
}
2019-12-20 00:29:37 +03:00
else {
}}
return res.f
2019-12-17 01:07:13 +03:00
}