1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

encoding: add base58 support (#11288)

This commit is contained in:
Adam Oates 2021-08-23 22:22:46 -05:00 committed by GitHub
parent d78e7e3b2b
commit 83e2a84c21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 335 additions and 0 deletions

View File

@ -0,0 +1,65 @@
module base58
// alphabets is a map of common base58 alphabets
pub const alphabets = init_alphabets()
// init_alphabet instantiates the preconfigured `Alphabet`s and returns them as `map[string]Alphabet`.
// This is a temporary function. Setting const alphabets to the value returned in this function
// causes a C error right now.
fn init_alphabets() map[string]Alphabet {
return {
'btc': new_alphabet('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
'flickr': new_alphabet('123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
'ripple': new_alphabet('rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
}
}
// Alphabet is the series of characters that an input
// will be encoded to and a decode table.
struct Alphabet {
mut:
decode []i8 = []i8{len: 128, init: -1}
encode []byte = []byte{len: 58}
}
// str returns an Alphabet encode table byte array as a string
pub fn (alphabet Alphabet) str() string {
// i guess i had a brain fart here. Why would I actually use this code?!
// mut str := []byte{}
// for entry in alphabet.encode {
// str << entry
// }
// return str.bytestr()
return alphabet.encode.bytestr()
}
// new_alphabet instantiates an Alphabet object based on
// the provided characters
pub fn new_alphabet(str string) ?Alphabet {
if str.len != 58 {
return error(@MOD + '.' + @FN + ': string must be 58 characters in length')
}
mut ret := Alphabet{}
copy(ret.encode, str.bytes())
mut distinct := 0
for i, b in ret.encode {
if ret.decode[b] == -1 {
distinct++
}
ret.decode[b] = i8(i)
}
if distinct != 58 {
return error(@MOD + '.' + @FN + ': string must not contain repeating characters')
}
return ret
}

View File

@ -0,0 +1,181 @@
// algorthim is adapted from https://github.com/mr-tron/base58 under the MIT license
module base58
import math
// encode_int encodes any integer type to base58 string with Bitcoin alphabet
pub fn encode_int(input int) ?string {
return encode_int_walpha(input, alphabets['btc'])
}
// encode_int_walpha any integer type to base58 string with custom alphabet
pub fn encode_int_walpha(input int, alphabet Alphabet) ?string {
if input <= 0 {
return error(@MOD + '.' + @FN + ': input must be greater than zero')
}
mut buffer := []byte{}
mut i := input
for i > 0 {
remainder := i % 58
buffer << alphabet.encode[i8(remainder)]
// This needs to be casted so byte inputs can
// be used. i8 because remainder will never be
// over 58.
i = i / 58
}
return buffer.reverse().bytestr()
}
// encode encodes byte array to base58 with Bitcoin alphabet
pub fn encode(input string) string {
return encode_walpha(input, alphabets['btc'])
}
// encode_walpha encodes byte array to base58 with custom aplhabet
pub fn encode_walpha(input string, alphabet Alphabet) string {
if input.len == 0 {
return ''
}
bin := input.bytes()
mut sz := bin.len
mut zcount := 0
for zcount < sz && bin[zcount] == 0 {
zcount++
}
// It is crucial to make this as short as possible, especially for
// the usual case of Bitcoin addresses
sz = zcount + (sz - zcount) * 555 / 406 + 1
// integer simplification of
// ceil(log(256)/log(58))
mut out := []byte{len: sz}
mut i := 0
mut high := 0
mut carry := u32(0)
high = sz - 1
for b in bin {
i = sz - 1
for carry = u32(b); i > high || carry != 0; i-- {
carry = carry + 256 * u32(out[i])
out[i] = byte(carry % 58)
carry /= 58
}
high = 1
}
// determine additional "zero-gap" in the buffer, aside from zcount
for i = zcount; i < sz && out[i] == 0; i++ {}
// now encode the values with actual alphabet in-place
val := out[i - zcount..]
sz = val.len
for i = 0; i < sz; i++ {
out[i] = alphabet.encode[val[i]]
}
return out[..sz].bytestr()
}
// decode_int decodes base58 string to an integer with Bitcoin alphabet
pub fn decode_int(input string) ?int {
return decode_int_walpha(input, alphabets['btc'])
}
// decode_int_walpha decodes base58 string to an integer with custom alphabet
pub fn decode_int_walpha(input string, alphabet Alphabet) ?int {
mut total := 0 // to hold the results
b58 := input.reverse()
for i, ch in b58 {
ch_i := alphabet.encode.bytestr().index_byte(ch)
if ch_i == -1 {
return error(@MOD + '.' + @FN +
': input string contains values not found in the provided alphabet')
}
val := ch_i * math.pow(58, i)
total += int(val)
}
return total
}
// decode decodes base58 string using the Bitcoin alphabet
pub fn decode(str string) ?string {
return decode_walpha(str, alphabets['btc'])
}
// decode_walpha decodes base58 string using custom alphabet
pub fn decode_walpha(str string, alphabet Alphabet) ?string {
if str.len == 0 {
return ''
}
zero := alphabet.encode[0]
b58sz := str.len
mut zcount := 0
for i := 0; i < b58sz && str[i] == zero; i++ {
zcount++
}
mut t := u64(0)
mut c := u64(0)
// the 32-bit algorithm stretches the result up to 2x
mut binu := []byte{len: 2 * ((b58sz * 406 / 555) + 1)}
mut outi := []u32{len: (b58sz + 3) / 4}
for _, r in str {
if r > 127 {
panic(@MOD + '.' + @FN +
': high-bit set on invalid digit; outside of ascii range ($r). This should never happen.')
}
if alphabet.decode[r] == -1 {
return error(@MOD + '.' + @FN + ': invalid base58 digit ($r)')
}
c = u64(alphabet.decode[r])
for j := outi.len - 1; j >= 0; j-- {
t = u64(outi[j]) * 58 + c
c = t >> 32
outi[j] = u32(t & 0xffffffff)
}
}
// initial mask depend on b58sz, on further loops it always starts at 24 bits
mut mask := (u32(b58sz % 4) * 8)
if mask == 0 {
mask = 32
}
mask -= 8
mut out_len := 0
for j := 0; j < outi.len; j++ {
for mask < 32 {
binu[out_len] = byte(outi[j] >> mask)
mask -= 8
out_len++
}
mask = 24
}
// find the most significant byte post-decode, if any
for msb := zcount; msb < binu.len; msb++ { // loop relies on u32 overflow
if binu[msb] > 0 {
return binu[msb - zcount..out_len].bytestr()
}
}
// it's all zeroes
return binu[..out_len].bytestr()
}

View File

@ -0,0 +1,89 @@
module base58
fn main() {
test_encode_int() or {}
test_decode_int() or {}
test_encode_string()
test_fails() or {}
}
fn test_encode_int() ? {
a := 0x24 // should be 'd' in base58
assert encode_int(a) ? == 'd'
test_encode_int_walpha() ?
}
fn test_encode_int_walpha() ? {
// random alphabet
abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
a := 0x24 // should be '_' in base58 with our custom alphabet
assert encode_int_walpha(a, abc) ? == '_'
}
fn test_decode_int() ? {
a := 'd'
assert decode_int(a) ? == 0x24
test_decode_int_walpha() ?
}
fn test_decode_int_walpha() ? {
abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
a := '_'
assert decode_int_walpha(a, abc) ? == 0x24
}
fn test_encode_string() {
// should be 'TtaR6twpTGu8VpY' in base58 and '0P7yfPSL0pQh2L5' with our custom alphabet
a := 'lorem ipsum'
assert encode(a) == 'TtaR6twpTGu8VpY'
abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
assert encode_walpha(a, abc) == '0P7yfPSL0pQh2L5'
}
fn test_decode_string() ? {
a := 'TtaR6twpTGu8VpY'
assert decode(a) ? == 'lorem ipsum'
abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
panic(@MOD + '.' + @FN + ': this should never happen')
}
b := '0P7yfPSL0pQh2L5'
assert decode_walpha(b, abc) ? == 'lorem ipsum'
}
fn test_fails() ? {
a := -238
b := 0
if z := encode_int(a) {
return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
}
if z := encode_int(b) {
return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
}
c := '!'
if z := decode_int(c) {
return error(@MOD + '.' + @FN + ': expected decode_int to fail, got $z')
}
if z := decode(c) {
return error(@MOD + '.' + @FN + ': expected decode to fail, got $z')
}
// repeating character
if abc := new_alphabet('aaaaafghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') {
return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
}
// more than 58 characters long
if abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUVWXYZ') {
return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
}
}