1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

strconv: new faster atof function (#9903)

This commit is contained in:
penguindark 2021-04-28 06:42:22 +02:00 committed by GitHub
parent faf2656335
commit a2014f86b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,9 +1,9 @@
module strconv module strconv
/*
/*
atof util atof util
Copyright (c) 2019 Dario Deledda. All rights reserved. Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license Use of this source code is governed by an MIT license
that can be found in the LICENSE file. that can be found in the LICENSE file.
@ -20,7 +20,6 @@ Original license: MIT
96 bit operation utilities 96 bit operation utilities
Note: when u128 will be available these function can be refactored Note: when u128 will be available these function can be refactored
*/ */
// right logical shift 96 bit // right logical shift 96 bit
@ -80,12 +79,9 @@ fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
} }
/* /*
Constants Constants
*/ */
const ( const (
// //
// f64 constants // f64 constants
@ -96,19 +92,6 @@ const (
double_plus_infinity = u64(0x7FF0000000000000) double_plus_infinity = u64(0x7FF0000000000000)
double_minus_infinity = u64(0xFFF0000000000000) double_minus_infinity = u64(0xFFF0000000000000)
// //
// parser state machine states
//
fsm_a = 0
fsm_b = 1
fsm_c = 2
fsm_d = 3
fsm_e = 4
fsm_f = 5
fsm_g = 6
fsm_h = 7
fsm_i = 8
fsm_stop = 9
//
// Possible parser return values. // Possible parser return values.
// //
parser_ok = 0 // parser finished OK parser_ok = 0 // parser finished OK
@ -127,15 +110,14 @@ const (
c_nine = `9` c_nine = `9`
c_ten = u32(10) c_ten = u32(10)
) )
/* /*
Utility Utility
*/ */
// NOTE: Modify these if working with non-ASCII encoding // NOTE: Modify these if working with non-ASCII encoding
fn is_digit(x byte) bool { fn is_digit(x byte) bool {
return (x >= c_zero && x <= c_nine) == true return (x >= strconv.c_zero && x <= strconv.c_nine) == true
} }
fn is_space(x byte) bool { fn is_space(x byte) bool {
@ -147,212 +129,115 @@ fn is_exp(x byte) bool {
} }
/* /*
Support struct Support struct
*/ */
/* /*
String parser String parser
NOTE: #TOFIX need one char after the last char of the number NOTE: #TOFIX need one char after the last char of the number
*/ */
// parser return a support struct with all the parsing information for the converter
fn parser(s string) (int, PrepNumber) { fn parser(s string) (int, PrepNumber) {
mut state := fsm_a
mut digx := 0 mut digx := 0
mut c := byte(` `) // initial value for kicking off the state machine mut result := strconv.parser_ok
mut result := parser_ok
mut expneg := false mut expneg := false
mut expexp := 0 mut expexp := 0
mut i := 0 mut i := 0
mut pn := PrepNumber{ mut pn := PrepNumber{}
}
for state != fsm_stop { // skip spaces
match state { for i < s.len && s[i].is_space() {
// skip starting spaces
fsm_a {
if is_space(c) == true {
c = s[i]
i++ i++
} }
else {
state = fsm_b // check negatives
} if s[i] == `-` {
}
// check for the sign or point
fsm_b {
state = fsm_c
if c == c_plus {
c = s[i]
i++
}
else if c == c_minus {
pn.negative = true pn.negative = true
c = s[i]
i++ i++
} }
else if is_digit(c) {
} // positive sign ignore it
else if c == c_dpoint { if s[i] == `+` {
}
else {
state = fsm_stop
}
}
// skip the inital zeros
fsm_c {
if c == c_zero {
c = s[i]
i++ i++
} }
else if c == c_dpoint {
c = s[i] // read mantissa
i++ for i < s.len && s[i].is_digit() {
state = fsm_d // println("$i => ${s[i]}")
} if digx < strconv.digits {
else {
state = fsm_e
}
}
// reading leading zeros in the fractional part of mantissa
fsm_d {
if c == c_zero {
c = s[i]
i++
if pn.exponent > -2147483647 {
pn.exponent--
}
}
else {
state = fsm_f
}
}
// reading integer part of mantissa
fsm_e {
if is_digit(c) {
if digx < digits {
pn.mantissa *= 10 pn.mantissa *= 10
pn.mantissa += u64(c - c_zero) pn.mantissa += u64(s[i] - strconv.c_zero)
digx++ digx++
} } else if pn.exponent < 2147483647 {
else if pn.exponent < 2147483647 {
pn.exponent++ pn.exponent++
} }
c = s[i]
i++ i++
} }
else if c == c_dpoint {
c = s[i] // read mantissa decimals
if (i < s.len) && (s[i] == `.`) {
i++ i++
state = fsm_f for i < s.len && s[i].is_digit() {
} if digx < strconv.digits {
else {
state = fsm_f
}
}
// reading fractional part of mantissa
fsm_f {
if is_digit(c) {
if digx < digits {
pn.mantissa *= 10 pn.mantissa *= 10
pn.mantissa += u64(c - c_zero) pn.mantissa += u64(s[i] - strconv.c_zero)
pn.exponent-- pn.exponent--
digx++ digx++
} }
c = s[i]
i++ i++
} }
else if is_exp(c) { }
c = s[i]
// read exponent
if (i < s.len) && ((s[i] == `e`) || (s[i] == `E`)) {
i++ i++
state = fsm_g if i < s.len {
} // esponent sign
else { if s[i] == strconv.c_plus {
state = fsm_g
}
}
// reading sign of exponent
fsm_g {
if c == c_plus {
c = s[i]
i++ i++
} } else if s[i] == strconv.c_minus {
else if c == c_minus {
expneg = true expneg = true
c = s[i]
i++ i++
} }
state = fsm_h
} for i < s.len && s[i].is_digit() {
// skipping leading zeros of exponent
fsm_h {
if c == c_zero {
c = s[i]
i++
}
else {
state = fsm_i
}
}
// reading exponent digits
fsm_i {
if is_digit(c) {
if expexp < 214748364 { if expexp < 214748364 {
expexp *= 10 expexp *= 10
expexp += int(c - c_zero) expexp += int(s[i] - strconv.c_zero)
} }
c = s[i]
i++ i++
} }
else {
state = fsm_stop
}
}
else {
}}
// C.printf("len: %d i: %d str: %s \n",s.len,i,s[..i])
if i >= s.len {
state = fsm_stop
} }
} }
if expneg { if expneg {
expexp = -expexp expexp = -expexp
} }
pn.exponent += expexp pn.exponent += expexp
if pn.mantissa == 0 { if pn.mantissa == 0 {
if pn.negative { if pn.negative {
result = parser_mzero result = strconv.parser_mzero
} else {
result = strconv.parser_pzero
} }
else { } else if pn.exponent > 309 {
result = parser_pzero
}
}
else if pn.exponent > 309 {
if pn.negative { if pn.negative {
result = parser_minf result = strconv.parser_minf
} else {
result = strconv.parser_pinf
} }
else { } else if pn.exponent < -328 {
result = parser_pinf
}
}
else if pn.exponent < -328 {
if pn.negative { if pn.negative {
result = parser_mzero result = strconv.parser_mzero
} } else {
else { result = strconv.parser_pzero
result = parser_pzero
} }
} }
return result, pn return result, pn
} }
/* /*
Converter to the bit form of the f64 number Converter to the bit form of the f64 number
*/ */
// converter return a u64 with the bit image of the f64 number // converter return a u64 with the bit image of the f64 number
@ -396,18 +281,18 @@ fn converter(mut pn PrepNumber) u64 {
s1 = q1 s1 = q1
s0 = q0 s0 = q0
} }
q2 = s2 / c_ten q2 = s2 / strconv.c_ten
r1 = s2 % c_ten r1 = s2 % strconv.c_ten
r2 = (s1 >> 8) | (r1 << 24) r2 = (s1 >> 8) | (r1 << 24)
q1 = r2 / c_ten q1 = r2 / strconv.c_ten
r1 = r2 % c_ten r1 = r2 % strconv.c_ten
r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24) r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24)
r0 = r2 / c_ten r0 = r2 / strconv.c_ten
r1 = r2 % c_ten r1 = r2 % strconv.c_ten
q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16) q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16)
q0 = r0 << 16 q0 = r0 << 16
r2 = (s0 & u32(0xFFFF)) | (r1 << 16) r2 = (s0 & u32(0xFFFF)) | (r1 << 16)
q0 |= r2 / c_ten q0 |= r2 / strconv.c_ten
s2 = q2 s2 = q2
s1 = q1 s1 = q1
s0 = q0 s0 = q0
@ -436,19 +321,22 @@ fn converter(mut pn PrepNumber) u64 {
* If bit 53 is 0, round down * If bit 53 is 0, round down
* If bit 53 is 1, round up * If bit 53 is 1, round up
*/ */
/* test case 1 complete /*
test case 1 complete
s2=0x1FFFFFFF s2=0x1FFFFFFF
s1=0xFFFFFF80 s1=0xFFFFFF80
s0=0x0 s0=0x0
*/ */
/* test case 1 check_round_bit /*
test case 1 check_round_bit
s2=0x18888888 s2=0x18888888
s1=0x88888880 s1=0x88888880
s0=0x0 s0=0x0
*/ */
/* test case check_round_bit + normalization /*
test case check_round_bit + normalization
s2=0x18888888 s2=0x18888888
s1=0x88888F80 s1=0x88888F80
s0=0x0 s0=0x0
@ -464,8 +352,7 @@ fn converter(mut pn PrepNumber) u64 {
if (s1 & ~check_round_mask) != 0 { if (s1 & ~check_round_mask) != 0 {
// C.printf("Add 1!\n") // C.printf("Add 1!\n")
s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0) s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
} } else {
else {
// C.printf("All 0!\n") // C.printf("All 0!\n")
if (s1 & (check_round_bit << u32(1))) != 0 { if (s1 & (check_round_bit << u32(1))) != 0 {
// C.printf("Add 1 form -1 bit control!\n") // C.printf("Add 1 form -1 bit control!\n")
@ -492,21 +379,17 @@ fn converter(mut pn PrepNumber) u64 {
binexp += 1023 binexp += 1023
if binexp > 2046 { if binexp > 2046 {
if pn.negative { if pn.negative {
result = double_minus_infinity result = strconv.double_minus_infinity
} else {
result = strconv.double_plus_infinity
} }
else { } else if binexp < 1 {
result = double_plus_infinity
}
}
else if binexp < 1 {
if pn.negative { if pn.negative {
result = double_minus_zero result = strconv.double_minus_zero
} else {
result = strconv.double_plus_zero
} }
else { } else if s2 != 0 {
result = double_plus_zero
}
}
else if s2 != 0 {
mut q := u64(0) mut q := u64(0)
binexs2 := u64(binexp) << 52 binexs2 := u64(binexp) << 52
q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2 q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2
@ -519,38 +402,33 @@ fn converter(mut pn PrepNumber) u64 {
} }
/* /*
Public functions Public functions
*/ */
// atof64 return a f64 from a string doing a parsing operation // atof64 return a f64 from a string doing a parsing operation
pub fn atof64(s string) f64 { pub fn atof64(s string) f64 {
mut pn := PrepNumber{ mut pn := PrepNumber{}
}
mut res_parsing := 0 mut res_parsing := 0
mut res := Float64u{} mut res := Float64u{}
res_parsing,pn = parser(s + ' ') // TODO: need an extra char for now res_parsing, pn = parser(s)
// println(pn)
match res_parsing { match res_parsing {
parser_ok { strconv.parser_ok {
res.u = converter(mut pn) res.u = converter(mut pn)
} }
parser_pzero { strconv.parser_pzero {
res.u = double_plus_zero res.u = strconv.double_plus_zero
} }
parser_mzero { strconv.parser_mzero {
res.u = double_minus_zero res.u = strconv.double_minus_zero
} }
parser_pinf { strconv.parser_pinf {
res.u = double_plus_infinity res.u = strconv.double_plus_infinity
} }
parser_minf { strconv.parser_minf {
res.u = double_minus_infinity res.u = strconv.double_minus_infinity
}
else {
} }
else {}
} }
return unsafe { res.f } return unsafe { res.f }
} }