mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
hash: add wyhash + benchmark, add fnv1a, add u64.hex() (#3584)
This commit is contained in:
parent
5a2534122e
commit
007baa2305
25
thirdparty/wyhash/LICENSE
vendored
Normal file
25
thirdparty/wyhash/LICENSE
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
|
76
thirdparty/wyhash/wyhash.h
vendored
Normal file
76
thirdparty/wyhash/wyhash.h
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
// Author: Wang Yi <godspeed_china@yeah.net>
|
||||
#ifndef wyhash_version_4
|
||||
#define wyhash_version_4
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#if defined(_MSC_VER) && defined(_M_X64)
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(_umul128)
|
||||
#endif
|
||||
const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull, _wyp2=0x8ebc6af09c88c6e3ull, _wyp3=0x589965cc75374cc3ull, _wyp4=0x1d8e4e27c47d124full;
|
||||
static inline uint64_t _wyrotr(uint64_t v, unsigned k) { return (v>>k)|(v<<(64-k)); }
|
||||
static inline uint64_t _wymum(uint64_t A, uint64_t B) {
|
||||
#ifdef WYHASH32
|
||||
uint64_t hh=(A>>32)*(B>>32), hl=(A>>32)*(unsigned)B, lh=(unsigned)A*(B>>32), ll=(uint64_t)(unsigned)A*(unsigned)B;
|
||||
return _wyrotr(hl,32)^_wyrotr(lh,32)^hh^ll;
|
||||
#else
|
||||
#ifdef __SIZEOF_INT128__
|
||||
__uint128_t r=A; r*=B; return (r>>64)^r;
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
A=_umul128(A, B, &B); return A^B;
|
||||
#else
|
||||
uint64_t ha=A>>32, hb=B>>32, la=(uint32_t)A, lb=(uint32_t)B, hi, lo;
|
||||
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
|
||||
lo=t+(rm1<<32); c+=lo<t;hi=rh+(rm0>>32)+(rm1>>32)+c; return hi^lo;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#ifndef WYHASH_LITTLE_ENDIAN
|
||||
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define WYHASH_LITTLE_ENDIAN 1
|
||||
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#define WYHASH_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
#endif
|
||||
#if(WYHASH_LITTLE_ENDIAN) || defined(__TINYC__)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v; }
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v; }
|
||||
#else
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v); }
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v); }
|
||||
#elif defined(_MSC_VER)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v); }
|
||||
#endif
|
||||
#endif
|
||||
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1]; }
|
||||
static inline uint64_t wyhash(const void* key, uint64_t len, uint64_t seed) {
|
||||
const uint8_t *p=(const uint8_t*)key; uint64_t i=len&63;
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
#define _like_(x) __builtin_expect(x,1)
|
||||
#define _unlike_(x) __builtin_expect(x,0)
|
||||
#else
|
||||
#define _like_(x) (x)
|
||||
#define _unlike_(x) (x)
|
||||
#endif
|
||||
if(_unlike_(!i)) { }
|
||||
else if(_unlike_(i<4)) seed=_wymum(_wyr3(p,i)^seed^_wyp0,seed^_wyp1);
|
||||
else if(_like_(i<=8)) seed=_wymum(_wyr4(p)^seed^_wyp0,_wyr4(p+i-4)^seed^_wyp1);
|
||||
else if(_like_(i<=16)) seed=_wymum(_wyr8(p)^seed^_wyp0,_wyr8(p+i-8)^seed^_wyp1);
|
||||
else if(_like_(i<=24)) seed=_wymum(_wyr8(p)^seed^_wyp0,_wyr8(p+8)^seed^_wyp1)^_wymum(_wyr8(p+i-8)^seed^_wyp2,seed^_wyp3);
|
||||
else if(_like_(i<=32)) seed=_wymum(_wyr8(p)^seed^_wyp0,_wyr8(p+8)^seed^_wyp1)^_wymum(_wyr8(p+16)^seed^_wyp2,_wyr8(p+i-8)^seed^_wyp3);
|
||||
else{ seed=_wymum(_wyr8(p)^seed^_wyp0,_wyr8(p+8)^seed^_wyp1)^_wymum(_wyr8(p+16)^seed^_wyp2,_wyr8(p+24)^seed^_wyp3)^_wymum(_wyr8(p+i-32)^seed^_wyp1,_wyr8(p+i-24)^seed^_wyp2)^_wymum(_wyr8(p+i-16)^seed^_wyp3,_wyr8(p+i-8)^seed^_wyp0); }
|
||||
if(_like_(i==len)) return _wymum(seed,len^_wyp4);
|
||||
uint64_t see1=seed, see2=seed, see3=seed;
|
||||
for(p+=i,i=len-i; _like_(i>=64); i-=64,p+=64) {
|
||||
seed=_wymum(_wyr8(p)^seed^_wyp0,_wyr8(p+8)^seed^_wyp1); see1=_wymum(_wyr8(p+16)^see1^_wyp2,_wyr8(p+24)^see1^_wyp3);
|
||||
see2=_wymum(_wyr8(p+32)^see2^_wyp1,_wyr8(p+40)^see2^_wyp2); see3=_wymum(_wyr8(p+48)^see3^_wyp3,_wyr8(p+56)^see3^_wyp0);
|
||||
}
|
||||
return _wymum(seed^see1^see2,see3^len^_wyp4);
|
||||
}
|
||||
static inline uint64_t wyhash64(uint64_t A, uint64_t B) { return _wymum(_wymum(A^_wyp0, B^_wyp1), _wyp2); }
|
||||
static inline uint64_t wyrand(uint64_t *seed) { *seed+=_wyp0; return _wymum(*seed^_wyp1,*seed); }
|
||||
static inline double wy2u01(uint64_t r) { const double _wynorm=1.0/(1ull<<52); return (r>>11)*_wynorm; }
|
||||
static inline double wy2gau(uint64_t r) { const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0; }
|
||||
#endif
|
55
tools/bench/wyhash.v
Normal file
55
tools/bench/wyhash.v
Normal file
@ -0,0 +1,55 @@
|
||||
module main
|
||||
|
||||
import (
|
||||
hash.fnv1a
|
||||
hash.wyhash
|
||||
rand
|
||||
time
|
||||
)
|
||||
|
||||
fn main() {
|
||||
sample_size := 10000000
|
||||
min_str_len := 20
|
||||
max_str_len := 40
|
||||
println('Generating $sample_size strings between $min_str_len - $max_str_len chars long...')
|
||||
mut bytepile := []byte
|
||||
for _ in 0 .. sample_size * max_str_len {
|
||||
bytepile << byte(40 + rand.next(125 - 40))
|
||||
}
|
||||
mut str_lens := []int
|
||||
for _ in 0 .. sample_size {
|
||||
str_lens << min_str_len + rand.next(max_str_len - min_str_len)
|
||||
}
|
||||
println('Hashing each of the generated strings...')
|
||||
t0 := time.ticks()
|
||||
mut start_pos := 0
|
||||
for len in str_lens {
|
||||
end_pos := start_pos + len
|
||||
str := string(bytepile[start_pos..end_pos],len)
|
||||
_ = wyhash.wyhash_c(&str.str, u64(str.len), 1)
|
||||
start_pos = end_pos
|
||||
}
|
||||
t1 := time.ticks()
|
||||
d1 := t1 - t0
|
||||
println(' * wyhash4 C: ${d1}ms')
|
||||
start_pos = 0
|
||||
for len in str_lens {
|
||||
end_pos := start_pos + len
|
||||
str := string(bytepile[start_pos..end_pos],len)
|
||||
_ = wyhash.sum64_string(str, 1)
|
||||
start_pos = end_pos
|
||||
}
|
||||
t2 := time.ticks()
|
||||
d2 := t2 - t1
|
||||
println(' * wyhash4: ${d2}ms')
|
||||
start_pos = 0
|
||||
for len in str_lens {
|
||||
end_pos := start_pos + len
|
||||
str := string(bytepile[start_pos..end_pos],len)
|
||||
_ = fnv1a.sum64_string(str)
|
||||
start_pos = end_pos
|
||||
}
|
||||
t3 := time.ticks()
|
||||
d3 := t3 - t2
|
||||
println(' * fnv1a64: ${d3}ms')
|
||||
}
|
@ -159,6 +159,13 @@ pub fn (n i64) hex() string {
|
||||
return tos(hex, count)
|
||||
}
|
||||
|
||||
pub fn (n u64) hex() string {
|
||||
len := if n >= u64(0) { n.str().len + 3 } else { 19 }
|
||||
hex := malloc(len)
|
||||
count := int(C.sprintf(charptr(hex), '0x%'C.PRIx64, n))
|
||||
return tos(hex, count)
|
||||
}
|
||||
|
||||
pub fn (a []byte) contains(val byte) bool {
|
||||
for aa in a {
|
||||
if aa == val {
|
||||
|
44
vlib/hash/fnv1a/fnv1a.v
Normal file
44
vlib/hash/fnv1a/fnv1a.v
Normal file
@ -0,0 +1,44 @@
|
||||
module fnv1a
|
||||
|
||||
const (
|
||||
fnv64_prime = 1099511628211
|
||||
fnv64_offset_basis = 14695981039346656037
|
||||
fnv32_offset_basis = u32(2166136261)
|
||||
fnv32_prime = u32(16777619)
|
||||
)
|
||||
|
||||
[inline]
|
||||
pub fn sum32_string(data string) u32 {
|
||||
mut hash := fnv32_offset_basis
|
||||
for i := 0; i < data.len; i++ {
|
||||
hash = (hash ^ u32(data[i])) * fnv32_prime
|
||||
}
|
||||
return hash
|
||||
}
|
||||
|
||||
[inline]
|
||||
pub fn sum32(data []byte) u32 {
|
||||
mut hash := fnv32_offset_basis
|
||||
for i := 0; i < data.len; i++ {
|
||||
hash = (hash ^ u32(data[i])) * fnv32_prime
|
||||
}
|
||||
return hash
|
||||
}
|
||||
|
||||
[inline]
|
||||
pub fn sum64_string(data string) u64 {
|
||||
mut hash := fnv64_offset_basis
|
||||
for i := 0; i < data.len; i++ {
|
||||
hash = (hash ^ u64(data[i])) * fnv64_prime
|
||||
}
|
||||
return hash
|
||||
}
|
||||
|
||||
[inline]
|
||||
pub fn sum64(data []byte) u64 {
|
||||
mut hash := fnv64_offset_basis
|
||||
for i := 0; i < data.len; i++ {
|
||||
hash = (hash ^ u64(data[i])) * fnv64_prime
|
||||
}
|
||||
return hash
|
||||
}
|
9
vlib/hash/fnv1a/fnv1a_test.v
Normal file
9
vlib/hash/fnv1a/fnv1a_test.v
Normal file
@ -0,0 +1,9 @@
|
||||
import hash.fnv1a
|
||||
|
||||
fn test_fnv1a() {
|
||||
a := 'apple'
|
||||
b := fnv1a.sum64_string(a)
|
||||
c := fnv1a.sum64(a.bytes())
|
||||
assert b.hex() == '0xf74a62a458befdbf'
|
||||
assert c.hex() == '0xf74a62a458befdbf'
|
||||
}
|
129
vlib/hash/wyhash/wyhash.v
Normal file
129
vlib/hash/wyhash/wyhash.v
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
|
||||
// Use of this source code is governed by an MIT license
|
||||
// that can be found in the LICENSE file.
|
||||
//
|
||||
// this is an implementation of wyhash v4
|
||||
// from https://github.com/wangyi-fudan/wyhash
|
||||
//
|
||||
// TODO: use u128 once implemented
|
||||
// currently the C version performs slightly better
|
||||
// because it uses 128 bit int when available and
|
||||
// branch prediction hints. the C version will be
|
||||
// removed once the perfomance is matched.
|
||||
// you can test performance by running:
|
||||
// v run tools/wyhash_benchmark.v
|
||||
// try running with and without the -prod flag
|
||||
module wyhash
|
||||
|
||||
#flag -I @VROOT/thirdparty/wyhash
|
||||
#include "wyhash.h"
|
||||
fn C.wyhash(byteptr, u64, u64) u64
|
||||
|
||||
|
||||
const (
|
||||
wyp0 = 0xa0761d6478bd642f
|
||||
wyp1 = 0xe7037ed1a0b428db
|
||||
wyp2 = 0x8ebc6af09c88c6e3
|
||||
wyp3 = 0x589965cc75374cc3
|
||||
wyp4 = 0x1d8e4e27c47d124f
|
||||
)
|
||||
|
||||
[inline]
|
||||
pub fn wyhash_c(key byteptr, len, seed u64) u64 {
|
||||
return C.wyhash(key, len, seed)
|
||||
}
|
||||
|
||||
[inline]
|
||||
pub fn sum64_string(key string, seed u64) u64 {
|
||||
return wyhash64(key.str, u64(key.len), seed)
|
||||
}
|
||||
|
||||
[inline]
|
||||
pub fn sum64(key []byte, seed u64) u64 {
|
||||
return wyhash64(key.data, u64(key.len), seed)
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wyhash64(key byteptr, len, seed_ u64) u64 {
|
||||
if len == 0 {
|
||||
return 0
|
||||
}
|
||||
mut p := &key[0]
|
||||
mut seed := seed_
|
||||
mut i := len & 63
|
||||
if i < 4 {
|
||||
seed = wymum(wyr3(p, i) ^ seed ^ wyp0, seed ^ wyp1)
|
||||
}
|
||||
else if i <= 8 {
|
||||
seed = wymum(wyr4(p) ^ seed ^ wyp0, wyr4(p + i - 4) ^ seed ^ wyp1)
|
||||
}
|
||||
else if i <= 16 {
|
||||
seed = wymum(wyr8(p) ^ seed ^ wyp0, wyr8(p + i - 8) ^ seed ^ wyp1)
|
||||
}
|
||||
else if i <= 24 {
|
||||
seed = wymum(wyr8(p) ^ seed ^ wyp0, wyr8(p + 8) ^ seed ^ wyp1) ^ wymum(wyr8(p + i - 8) ^ seed ^ wyp2, seed ^ wyp3)
|
||||
}
|
||||
else if i <= 32 {
|
||||
seed = wymum(wyr8(p) ^ seed ^ wyp0, wyr8(p + 8) ^ seed ^ wyp1) ^ wymum(wyr8(p + 16) ^ seed ^ wyp2, wyr8(p + i - 8) ^ seed ^ wyp3)
|
||||
}
|
||||
else {
|
||||
seed = wymum(wyr8(p) ^ seed ^ wyp0, wyr8(p + 8) ^ seed ^ wyp1) ^ wymum(wyr8(p + 16) ^ seed ^ wyp2, wyr8(p + 24) ^ seed ^ wyp3) ^ wymum(wyr8(p + i - 32) ^ seed ^ wyp1, wyr8(p + i - 24) ^ seed ^ wyp2) ^ wymum(wyr8(p + i - 16) ^ seed ^ wyp3, wyr8(p + i - 8) ^ seed ^ wyp0)
|
||||
}
|
||||
if i == len {
|
||||
return wymum(seed, len ^ wyp4)
|
||||
}
|
||||
mut see1 := seed
|
||||
mut see2 := seed
|
||||
mut see3 := seed
|
||||
p = p + i
|
||||
for i = len - i; i >= 64; i -= 64 {
|
||||
seed = wymum(wyr8(p) ^ seed ^ wyp0, wyr8(p + 8) ^ seed ^ wyp1)
|
||||
see1 = wymum(wyr8(p + 16) ^ see1 ^ wyp2, wyr8(p + 24) ^ see1 ^ wyp3)
|
||||
see2 = wymum(wyr8(p + 32) ^ see2 ^ wyp1, wyr8(p + 40) ^ see2 ^ wyp2)
|
||||
see3 = wymum(wyr8(p + 48) ^ see3 ^ wyp3, wyr8(p + 56) ^ see3 ^ wyp0)
|
||||
p = p + 64
|
||||
}
|
||||
return wymum(seed ^ see1 ^ see2, see3 ^ len ^ wyp4)
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wyrotr(v u64, k u32) u64 {
|
||||
return (v>>k) | (v<<(64 - k))
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wymum(a, b u64) u64 {
|
||||
/*
|
||||
mut r := u128(a)
|
||||
r = r*b
|
||||
return (r>>64)^r
|
||||
*/
|
||||
mask32 := u32(4294967295)
|
||||
x0 := a & mask32
|
||||
x1 := a>>32
|
||||
y0 := b & mask32
|
||||
y1 := b>>32
|
||||
w0 := x0 * y0
|
||||
t := x1 * y0 + (w0>>32)
|
||||
mut w1 := t & mask32
|
||||
w2 := t>>32
|
||||
w1 += x0 * y1
|
||||
hi := x1 * y1 + w2 + (w1>>32)
|
||||
lo := a * b
|
||||
return hi ^ lo
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wyr3(p byteptr, k u64) u64 {
|
||||
return (u64(p[0])<<16) | (u64(p[k>>1])<<8) | u64(p[k - 1])
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wyr4(p byteptr) u64 {
|
||||
return u32(p[0]) | (u32(p[1])<<u32(8)) | (u32(p[2])<<u32(16)) | (u32(p[3])<<u32(24))
|
||||
}
|
||||
|
||||
[inline]
|
||||
fn wyr8(p byteptr) u64 {
|
||||
return u64(p[0]) | (u64(p[1])<<8) | (u64(p[2])<<16) | (u64(p[3])<<24) | (u64(p[4])<<32) | (u64(p[5])<<40) | (u64(p[6])<<48) | (u64(p[7])<<56)
|
||||
}
|
29
vlib/hash/wyhash/wyhash_test.v
Normal file
29
vlib/hash/wyhash/wyhash_test.v
Normal file
@ -0,0 +1,29 @@
|
||||
import hash.wyhash
|
||||
|
||||
struct WyHashTest {
|
||||
s string
|
||||
seed u64
|
||||
expected u64
|
||||
}
|
||||
|
||||
fn test_wyhash() {
|
||||
tests := [WyHashTest{
|
||||
'',0,0x0},
|
||||
WyHashTest{
|
||||
'v',1,0xc72a8f8bdfdd82},
|
||||
WyHashTest{
|
||||
'is',2,0xa1099c1c58fc13e},
|
||||
WyHashTest{
|
||||
'the best',3,0x1b1215ef0b0b94c},
|
||||
WyHashTest{
|
||||
'abcdefghijklmnopqrstuvwxyz',4,0x6db0e773d1503fac},
|
||||
WyHashTest{
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',5,0xe062dfda99413626},
|
||||
]
|
||||
for test in tests {
|
||||
got := wyhash.sum64(test.s.bytes(), test.seed)
|
||||
// println(' # GOT: $got | $got.hex()')
|
||||
// println(' # EXPECTED: $test.expected | $test.expected.hex()')
|
||||
assert got == test.expected
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user