1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/strconv/f64_str.c.v

383 lines
10 KiB
V
Raw Normal View History

module strconv
2021-06-18 17:59:56 +03:00
/*=============================================================================
f64 to string
2023-03-28 23:55:57 +03:00
Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
This file contains the f64 to string functions
These functions are based on the work of:
Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN
Conference on Programming Language Design and ImplementationJune 2018
Pages 270282 https://doi.org/10.1145/3192366.3192369
inspired by the Go version here:
https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea
=============================================================================*/
2020-02-25 13:12:37 +03:00
[direct_array_access]
fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
2021-06-18 17:59:56 +03:00
mut n_digit := i_n_digit + 1
pad_digit := i_pad_digit + 1
mut out := d.m
mut d_exp := d.e
// mut out_len := decimal_len_64(out)
2021-06-18 17:59:56 +03:00
mut out_len := dec_digits(out)
out_len_original := out_len
2020-02-25 13:12:37 +03:00
mut fw_zeros := 0
if pad_digit > out_len {
fw_zeros = pad_digit - out_len
}
2022-04-15 14:45:52 +03:00
mut buf := []u8{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0}
2020-02-25 13:12:37 +03:00
mut i := 0
if neg {
buf[i] = `-`
2020-02-25 13:12:37 +03:00
i++
}
mut disp := 0
if out_len <= 1 {
disp = 1
}
// rounding last used digit
if n_digit < out_len {
2021-06-18 17:59:56 +03:00
// println("out:[$out]")
out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
out /= ten_pow_table_64[out_len - n_digit]
2021-06-18 17:59:56 +03:00
// println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}")
if d.m / ten_pow_table_64[out_len - n_digit] < out {
d_exp++
n_digit++
}
2021-06-18 17:59:56 +03:00
// println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}")
out_len = n_digit
2021-06-18 17:59:56 +03:00
// println("orig: ${out_len_original} new len: ${out_len} out:[$out]")
}
2020-02-25 13:12:37 +03:00
y := i + out_len
mut x := 0
for x < (out_len - disp - 1) {
2022-04-15 14:45:52 +03:00
buf[y - x] = `0` + u8(out % 10)
out /= 10
2020-02-25 13:12:37 +03:00
i++
x++
}
// no decimal digits needed, end here
if i_n_digit == 0 {
unsafe {
2021-06-18 17:59:56 +03:00
buf[i] = 0
2022-04-15 14:45:52 +03:00
return tos(&u8(&buf[0]), i)
}
}
2020-02-25 13:12:37 +03:00
if out_len >= 1 {
buf[y - x] = `.`
x++
i++
}
2021-06-18 17:59:56 +03:00
if y - x >= 0 {
2022-04-15 14:45:52 +03:00
buf[y - x] = `0` + u8(out % 10)
2020-02-25 13:12:37 +03:00
i++
}
for fw_zeros > 0 {
buf[i] = `0`
i++
fw_zeros--
}
2021-06-18 17:59:56 +03:00
buf[i] = `e`
2020-02-25 13:12:37 +03:00
i++
mut exp := d_exp + out_len_original - 1
2020-02-25 13:12:37 +03:00
if exp < 0 {
2021-06-18 17:59:56 +03:00
buf[i] = `-`
2020-02-25 13:12:37 +03:00
i++
exp = -exp
} else {
2021-06-18 17:59:56 +03:00
buf[i] = `+`
2020-02-25 13:12:37 +03:00
i++
}
// Always print at least two digits to match strconv's formatting.
d2 := exp % 10
exp /= 10
d1 := exp % 10
d0 := exp / 10
if d0 > 0 {
2022-04-15 14:45:52 +03:00
buf[i] = `0` + u8(d0)
2020-02-25 13:12:37 +03:00
i++
}
2022-04-15 14:45:52 +03:00
buf[i] = `0` + u8(d1)
2020-02-25 13:12:37 +03:00
i++
2022-04-15 14:45:52 +03:00
buf[i] = `0` + u8(d2)
2020-02-25 13:12:37 +03:00
i++
2021-06-18 17:59:56 +03:00
buf[i] = 0
2020-02-25 13:12:37 +03:00
return unsafe {
2022-04-15 14:45:52 +03:00
tos(&u8(&buf[0]), i)
}
2020-02-25 13:12:37 +03:00
}
fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) {
mut d := Dec64{}
e := exp - bias64
if e > mantbits64 {
2020-02-25 13:12:37 +03:00
return d, false
}
shift := mantbits64 - e
2021-06-18 17:59:56 +03:00
mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1
// mant := i_mant | (1 << mantbits64) // implicit 1
2020-02-25 13:12:37 +03:00
d.m = mant >> shift
if (d.m << shift) != mant {
return d, false
}
for (d.m % 10) == 0 {
d.m /= 10
d.e++
}
return d, true
}
fn f64_to_decimal(mant u64, exp u64) Dec64 {
mut e2 := 0
mut m2 := u64(0)
if exp == 0 {
// We subtract 2 so that the bounds computation has
// 2 additional bits.
e2 = 1 - bias64 - int(mantbits64) - 2
2020-02-25 13:12:37 +03:00
m2 = mant
} else {
e2 = int(exp) - bias64 - int(mantbits64) - 2
m2 = (u64(1) << mantbits64) | mant
2020-02-25 13:12:37 +03:00
}
2021-06-18 17:59:56 +03:00
even := (m2 & 1) == 0
2020-02-25 13:12:37 +03:00
accept_bounds := even
// Step 2: Determine the interval of valid decimal representations.
2021-06-18 17:59:56 +03:00
mv := u64(4 * m2)
2020-02-25 13:12:37 +03:00
mm_shift := bool_to_u64(mant != 0 || exp <= 1)
// Step 3: Convert to a decimal power base uing 128-bit arithmetic.
2021-06-18 17:59:56 +03:00
mut vr := u64(0)
mut vp := u64(0)
mut vm := u64(0)
mut e10 := 0
2020-02-25 13:12:37 +03:00
mut vm_is_trailing_zeros := false
mut vr_is_trailing_zeros := false
if e2 >= 0 {
// This expression is slightly faster than max(0, log10Pow2(e2) - 1).
q := log10_pow2(e2) - bool_to_u32(e2 > 3)
e10 = int(q)
k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1
i := -e2 + int(q) + k
mul := pow5_inv_split_64[q]
2021-06-18 17:59:56 +03:00
vr = mul_shift_64(u64(4) * m2, mul, i)
vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i)
2020-02-25 13:12:37 +03:00
vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i)
if q <= 21 {
// This should use q <= 22, but I think 21 is also safe.
// Smaller values may still be safe, but it's more
// difficult to reason about them. Only one of mp, mv,
// and mm can be a multiple of 5, if any.
if mv % 5 == 0 {
2020-02-25 13:12:37 +03:00
vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q)
} else if accept_bounds {
// Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
// <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
// <=> true && pow5Factor64(mm) >= q, since e2 >= q.
2021-06-18 17:59:56 +03:00
vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift,
q)
} else if multiple_of_power_of_five_64(mv + 2, q) {
2020-02-25 13:12:37 +03:00
vp--
}
}
} else {
// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
q := log10_pow5(-e2) - bool_to_u32(-e2 > 1)
e10 = int(q) + e2
i := -e2 - int(q)
k := pow5_bits(i) - pow5_num_bits_64
j := int(q) - k
2020-02-25 13:12:37 +03:00
mul := pow5_split_64[i]
2021-06-18 17:59:56 +03:00
vr = mul_shift_64(u64(4) * m2, mul, j)
vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j)
2020-02-25 13:12:37 +03:00
vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j)
if q <= 1 {
// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
// mv = 4 * m2, so it always has at least two trailing 0 bits.
vr_is_trailing_zeros = true
if accept_bounds {
// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
vm_is_trailing_zeros = (mm_shift == 1)
} else {
// mp = mv + 2, so it always has at least one trailing 0 bit.
vp--
}
} else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
// We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
// <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
// <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
// <=> (mv & ((1 << (q - 1)) - 1)) == 0
// We also need to make sure that the left shift does not overflow.
vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1)
}
}
// Step 4: Find the shortest decimal representation
// in the interval of valid representations.
2021-06-18 17:59:56 +03:00
mut removed := 0
2022-04-15 14:45:52 +03:00
mut last_removed_digit := u8(0)
2021-06-18 17:59:56 +03:00
mut out := u64(0)
2020-02-25 13:12:37 +03:00
// On average, we remove ~2 digits.
if vm_is_trailing_zeros || vr_is_trailing_zeros {
// General case, which happens rarely (~0.7%).
for {
vp_div_10 := vp / 10
vm_div_10 := vm / 10
2020-02-25 13:12:37 +03:00
if vp_div_10 <= vm_div_10 {
break
}
vm_mod_10 := vm % 10
vr_div_10 := vr / 10
vr_mod_10 := vr % 10
vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0
vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0)
2022-04-15 14:45:52 +03:00
last_removed_digit = u8(vr_mod_10)
2020-02-25 13:12:37 +03:00
vr = vr_div_10
vp = vp_div_10
vm = vm_div_10
removed++
}
if vm_is_trailing_zeros {
for {
vm_div_10 := vm / 10
vm_mod_10 := vm % 10
if vm_mod_10 != 0 {
break
}
vp_div_10 := vp / 10
vr_div_10 := vr / 10
vr_mod_10 := vr % 10
vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0)
2022-04-15 14:45:52 +03:00
last_removed_digit = u8(vr_mod_10)
2020-02-25 13:12:37 +03:00
vr = vr_div_10
vp = vp_div_10
vm = vm_div_10
removed++
}
}
if vr_is_trailing_zeros && (last_removed_digit == 5) && (vr % 2) == 0 {
// Round even if the exact number is .....50..0.
last_removed_digit = 4
}
out = vr
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 {
out++
}
} else {
// Specialized for the common case (~99.3%).
// Percentages below are relative to this.
mut round_up := false
for vp / 100 > vm / 100 {
// Optimization: remove two digits at a time (~86.2%).
round_up = (vr % 100) >= 50
vr /= 100
vp /= 100
vm /= 100
removed += 2
}
// Loop iterations below (approximately), without optimization above:
// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
// Loop iterations below (approximately), with optimization above:
// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
for vp / 10 > vm / 10 {
round_up = (vr % 10) >= 5
vr /= 10
vp /= 10
vm /= 10
removed++
}
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
out = vr + bool_to_u64(vr == vm || round_up)
}
2021-06-18 17:59:56 +03:00
return Dec64{
m: out
e: e10 + removed
}
2020-02-25 13:12:37 +03:00
}
//=============================================================================
// String Functions
//=============================================================================
// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
2020-02-25 13:12:37 +03:00
pub fn f64_to_str(f f64, n_digit int) string {
mut u1 := Uf64{}
u1.f = f
2021-06-18 17:59:56 +03:00
u := unsafe { u1.u }
2020-02-25 13:12:37 +03:00
neg := (u >> (mantbits64 + expbits64)) != 0
mant := u & ((u64(1) << mantbits64) - u64(1))
exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
2021-06-18 17:59:56 +03:00
// println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
2020-02-25 13:12:37 +03:00
// Exit early for easy cases.
if (exp == maxexp64) || (exp == 0 && mant == 0) {
2020-02-25 13:12:37 +03:00
return get_string_special(neg, exp == 0, mant == 0)
}
mut d, ok := f64_to_decimal_exact_int(mant, exp)
if !ok {
2021-06-18 17:59:56 +03:00
// println("to_decimal")
2020-02-25 13:12:37 +03:00
d = f64_to_decimal(mant, exp)
}
2021-06-18 17:59:56 +03:00
// println("${d.m} ${d.e}")
return d.get_string_64(neg, n_digit, 0)
}
// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
pub fn f64_to_str_pad(f f64, n_digit int) string {
mut u1 := Uf64{}
u1.f = f
2021-06-18 17:59:56 +03:00
u := unsafe { u1.u }
neg := (u >> (mantbits64 + expbits64)) != 0
mant := u & ((u64(1) << mantbits64) - u64(1))
exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
2021-06-18 17:59:56 +03:00
// println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
// Exit early for easy cases.
if (exp == maxexp64) || (exp == 0 && mant == 0) {
return get_string_special(neg, exp == 0, mant == 0)
}
mut d, ok := f64_to_decimal_exact_int(mant, exp)
if !ok {
2021-06-18 17:59:56 +03:00
// println("to_decimal")
d = f64_to_decimal(mant, exp)
}
2021-06-18 17:59:56 +03:00
// println("DEBUG: ${d.m} ${d.e}")
return d.get_string_64(neg, n_digit, n_digit)
2020-02-25 13:12:37 +03:00
}